From 9c57249784d52ad82ce593f6c6fda17eee960ee8 Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Tue, 12 Aug 2025 11:32:23 -0700 Subject: [PATCH 01/32] Playing with parser implementation --- Cargo.toml | 11 +- examples/nom.rs | 37 +++++ src/lib.rs | 409 +++++++++++++++++++++++++++--------------------- tests/parse.rs | 136 ++++++++-------- 4 files changed, 344 insertions(+), 249 deletions(-) create mode 100644 examples/nom.rs diff --git a/Cargo.toml b/Cargo.toml index 8146fff..6bd02d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ authors = ["T.J. Telan "] categories = ["parser-implementations", "encoding"] description = "A parser for git repo urls based on url crate" documentation = "https://docs.rs/git-url-parse" -edition = "2021" +edition = "2024" keywords = ["git", "url", "parsing", "normalize"] license = "MIT" name = "git-url-parse" @@ -19,8 +19,11 @@ tracing = ["dep:tracing"] [dependencies] tracing = { version = "0.1", optional = true } url = { version = "2.2" } -strum = { version = "^0.27", features = ["derive"] } -thiserror = "^2.0" +strum = { version = "0.27", features = ["derive"] } +thiserror = "2.0" + +nom = "8.0.0" [dev-dependencies] -env_logger = "^0.11" +env_logger = "0.11" +regex = "1.10" diff --git a/examples/nom.rs b/examples/nom.rs new file mode 100644 index 0000000..e37f962 --- /dev/null +++ b/examples/nom.rs @@ -0,0 +1,37 @@ +use git_url_parse::{GitUrl, GitUrlParseError}; +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::multi::many0; +use nom::{IResult, Parser}; + +fn main() -> Result<(), GitUrlParseError> { + env_logger::init(); + + let test_vec = vec![ + "https://github.com/tjtelan/orbitalci.git", + "git@github.com:tjtelan/orbitalci.git", + "https://token:x-oauth-basic@host.xz/path/to/repo.git/", + "https://x-token-auth:token@host.xz/path/to/repo.git/", + "git+ssh://git@some-host.com/and-the-path/name", + "git://some-host.com/and-the-path/name", + "host.tld:user/project-name.git", + "file:///path/to/repo.git/", + "~/path/to/repo.git/", + "./path/to/repo.git/", + "./path/to/repo.git", + "../test_repo", + "..\\test_repo", + "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName", + "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName", + ]; + + for test_url in test_vec { + let parsed = GitUrl::parse(test_url)?; + //println!("leftover:{leftover:#?}, output:{output:#?}"); + //let parsed = GitUrl::parse(test_url)?; + //println!("Original: {}", test_url); + println!("Parsed: {:?}", parsed); + //println!("{:?}\n", parsed); + } + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index 773f35d..ee46533 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,13 @@ use strum::{Display, EnumString, VariantNames}; use thiserror::Error; use url::Url; +use nom::branch::alt; +use nom::bytes::complete::{is_not, tag, take_till, take_until, take_while}; +use nom::character::complete::{anychar, char, one_of}; +use nom::multi::many0; +use nom::sequence::{preceded, terminated}; +use nom::{AsBytes, IResult, Parser}; + #[cfg(feature = "tracing")] use tracing::debug; @@ -32,6 +39,31 @@ pub enum Scheme { Unspecified, } +fn scheme(input: &str) -> IResult<&str, &str> { + terminated( + alt(( + tag(Scheme::File.to_string().as_bytes()), + tag(Scheme::Ftps.to_string().as_bytes()), + tag(Scheme::Ftp.to_string().as_bytes()), + tag(Scheme::GitSsh.to_string().as_bytes()), + tag(Scheme::Git.to_string().as_bytes()), + tag(Scheme::Https.to_string().as_bytes()), + tag(Scheme::Http.to_string().as_bytes()), + tag(Scheme::Ssh.to_string().as_bytes()), + )), + tag("://"), + ) + .parse(input) +} + +fn username(input: &str) -> IResult<&str, &str> { + terminated(take_until("@"), tag("@")).parse(input) +} + +fn hostname(input: &str) -> IResult<&str, &str> { + terminated(is_not("/:"), one_of("/:")).parse(input) +} + /// GitUrl represents an input url that is a url used by git /// Internally during parsing the url is sanitized and uses the `url` crate to perform /// the majority of the parsing effort, and with some extra handling to expose @@ -40,15 +72,15 @@ pub enum Scheme { pub struct GitUrl { /// The fully qualified domain name (FQDN) or IP of the repo pub host: Option, - /// The name of the repo - pub name: String, - /// The owner/account/project name - pub owner: Option, - /// The organization name. Supported by Azure DevOps - pub organization: Option, - /// The full name of the repo, formatted as "owner/name" - pub fullname: String, - /// The git url scheme + ///// The name of the repo + //pub name: String, + ///// The owner/account/project name + //pub owner: Option, + ///// The organization name. Supported by Azure DevOps + //pub organization: Option, + ///// The full name of the repo, formatted as "owner/name" + //pub fullname: String, + ///// The git url scheme pub scheme: Scheme, /// The authentication user pub user: Option, @@ -120,10 +152,10 @@ impl Default for GitUrl { fn default() -> Self { GitUrl { host: None, - name: "".to_string(), - owner: None, - organization: None, - fullname: "".to_string(), + //name: "".to_string(), + //owner: None, + //organization: None, + //fullname: "".to_string(), scheme: Scheme::Unspecified, user: None, token: None, @@ -155,172 +187,197 @@ impl GitUrl { /// Returns a `Result` after normalizing and parsing `url` for metadata pub fn parse(url: &str) -> Result { - // Normalize the url so we can use Url crate to process ssh urls - let normalized = normalize_url(url)?; - - // Some pre-processing for paths - let scheme = if let Ok(scheme) = Scheme::from_str(normalized.scheme()) { - scheme - } else { - return Err(GitUrlParseError::UnsupportedScheme( - normalized.scheme().to_string(), - )); - }; - if normalized.path().is_empty() { - return Err(GitUrlParseError::EmptyPath); - } - - // Normalized ssh urls can always have their first '/' removed - let urlpath = match &scheme { - Scheme::Ssh => { - // At the moment, we're relying on url::Url's parse() behavior to not duplicate - // the leading '/' when we normalize - normalized.path()[1..].to_string() - } - _ => normalized.path().to_string(), - }; - - let git_suffix_check = &urlpath.ends_with(".git"); - - // Parse through path for name,owner,organization - // Support organizations for Azure Devops - #[cfg(feature = "tracing")] - debug!("The urlpath: {:?}", &urlpath); - - // Most git services use the path for metadata in the same way, so we're going to separate - // the metadata - // ex. github.com/accountname/reponame - // owner = accountname - // name = reponame - // - // organizations are going to be supported on a per-host basis - let splitpath = &urlpath.rsplit_terminator('/').collect::>(); - - #[cfg(feature = "tracing")] - debug!("rsplit results for metadata: {:?}", splitpath); - - let name = splitpath[0].trim_end_matches(".git").to_string(); + println!("start: {url}"); + let mut giturl = GitUrl::default(); - // TODO: I think here is where we want to update the url pattern identification step.. I want to be able to have a hint that the user can pass + let mut working_url = url; - let (owner, organization, fullname) = match &scheme { - // We're not going to assume anything about metadata from a filepath - Scheme::File => (None::, None::, name.clone()), - _ => { - let mut fullname: Vec<&str> = Vec::new(); - - // TODO: Add support for parsing out orgs from these urls - let hosts_w_organization_in_path = ["dev.azure.com", "ssh.dev.azure.com"]; - //vec!["dev.azure.com", "ssh.dev.azure.com", "visualstudio.com"]; - - let host_str = if let Some(host) = normalized.host_str() { - host - } else { - return Err(GitUrlParseError::UnsupportedUrlHostFormat); - }; - - match hosts_w_organization_in_path.contains(&host_str) { - true => { - #[cfg(feature = "tracing")] - debug!("Found a git provider with an org"); - - // The path differs between git:// and https:// schemes - - match &scheme { - // Example: "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName", - Scheme::Ssh => { - // Organization - fullname.push(splitpath[2]); - // Project/Owner name - fullname.push(splitpath[1]); - // Repo name - fullname.push(splitpath[0]); - - ( - Some(splitpath[1].to_string()), - Some(splitpath[2].to_string()), - fullname.join("/"), - ) - } - // Example: "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName", - Scheme::Https => { - // Organization - fullname.push(splitpath[3]); - // Project/Owner name - fullname.push(splitpath[2]); - // Repo name - fullname.push(splitpath[0]); - - ( - Some(splitpath[2].to_string()), - Some(splitpath[3].to_string()), - fullname.join("/"), - ) - } - - // TODO: I'm not sure if I want to support throwing this error long-term - _ => return Err(GitUrlParseError::UnexpectedScheme), - } - } - false => { - if !url.starts_with("ssh") && splitpath.len() < 2 { - return Err(GitUrlParseError::UnexpectedFormat); - } - - let position = match splitpath.len() { - 0 => return Err(GitUrlParseError::UnexpectedFormat), - 1 => 0, - _ => 1, - }; - - // push owner - fullname.push(splitpath[position]); - // push name - fullname.push(name.as_str()); - - ( - Some(splitpath[position].to_string()), - None::, - fullname.join("/"), - ) - } - } - } - }; + if let Ok((leftover, scheme)) = scheme(working_url) { + println!("leftover: {leftover}, scheme: {scheme:?}"); + giturl.scheme = Scheme::from_str(scheme).expect("Unknown scheme"); + working_url = leftover; + } - let final_host = match scheme { - Scheme::File => None, - _ => normalized.host_str().map(|h| h.to_string()), - }; + if let Ok((leftover, username)) = username(working_url) { + println!("leftover: {leftover}, username: {username:?}"); + giturl.user = Some(username.to_string()); + working_url = leftover; + } - let final_path = match scheme { - Scheme::File => { - if let Some(host) = normalized.host_str() { - format!("{}{}", host, urlpath) - } else { - urlpath - } - } - _ => urlpath, - }; + if let Ok((leftover, hostname)) = hostname(working_url) { + println!("leftover {leftover}, hostname: {hostname}"); + giturl.host = Some(hostname.to_string()); + working_url = leftover; + } - Ok(GitUrl { - host: final_host, - name, - owner, - organization, - fullname, - scheme, - user: match normalized.username().to_string().len() { - 0 => None, - _ => Some(normalized.username().to_string()), - }, - token: normalized.password().map(|p| p.to_string()), - port: normalized.port(), - path: final_path, - git_suffix: *git_suffix_check, - scheme_prefix: url.contains("://") || url.starts_with("git:"), - }) + println!(""); + Ok(giturl) + //// Normalize the url so we can use Url crate to process ssh urls + //let normalized = normalize_url(url)?; + + //// Some pre-processing for paths + //let scheme = if let Ok(scheme) = Scheme::from_str(normalized.scheme()) { + // scheme + //} else { + // return Err(GitUrlParseError::UnsupportedScheme( + // normalized.scheme().to_string(), + // )); + //}; + //if normalized.path().is_empty() { + // return Err(GitUrlParseError::EmptyPath); + //} + + //// Normalized ssh urls can always have their first '/' removed + //let urlpath = match &scheme { + // Scheme::Ssh => { + // // At the moment, we're relying on url::Url's parse() behavior to not duplicate + // // the leading '/' when we normalize + // normalized.path()[1..].to_string() + // } + // _ => normalized.path().to_string(), + //}; + + //let git_suffix_check = &urlpath.ends_with(".git"); + + //// Parse through path for name,owner,organization + //// Support organizations for Azure Devops + //#[cfg(feature = "tracing")] + //debug!("The urlpath: {:?}", &urlpath); + + //// Most git services use the path for metadata in the same way, so we're going to separate + //// the metadata + //// ex. github.com/accountname/reponame + //// owner = accountname + //// name = reponame + //// + //// organizations are going to be supported on a per-host basis + //let splitpath = &urlpath.rsplit_terminator('/').collect::>(); + + //#[cfg(feature = "tracing")] + //debug!("rsplit results for metadata: {:?}", splitpath); + + //let name = splitpath[0].trim_end_matches(".git").to_string(); + + //// TODO: I think here is where we want to update the url pattern identification step.. I want to be able to have a hint that the user can pass + + //let (owner, organization, fullname) = match &scheme { + // // We're not going to assume anything about metadata from a filepath + // Scheme::File => (None::, None::, name.clone()), + // _ => { + // let mut fullname: Vec<&str> = Vec::new(); + + // // TODO: Add support for parsing out orgs from these urls + // let hosts_w_organization_in_path = ["dev.azure.com", "ssh.dev.azure.com"]; + // //vec!["dev.azure.com", "ssh.dev.azure.com", "visualstudio.com"]; + + // let host_str = if let Some(host) = normalized.host_str() { + // host + // } else { + // return Err(GitUrlParseError::UnsupportedUrlHostFormat); + // }; + + // match hosts_w_organization_in_path.contains(&host_str) { + // true => { + // #[cfg(feature = "tracing")] + // debug!("Found a git provider with an org"); + + // // The path differs between git:// and https:// schemes + + // match &scheme { + // // Example: "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName", + // Scheme::Ssh => { + // // Organization + // fullname.push(splitpath[2]); + // // Project/Owner name + // fullname.push(splitpath[1]); + // // Repo name + // fullname.push(splitpath[0]); + + // ( + // Some(splitpath[1].to_string()), + // Some(splitpath[2].to_string()), + // fullname.join("/"), + // ) + // } + // // Example: "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName", + // Scheme::Https => { + // // Organization + // fullname.push(splitpath[3]); + // // Project/Owner name + // fullname.push(splitpath[2]); + // // Repo name + // fullname.push(splitpath[0]); + + // ( + // Some(splitpath[2].to_string()), + // Some(splitpath[3].to_string()), + // fullname.join("/"), + // ) + // } + + // // TODO: I'm not sure if I want to support throwing this error long-term + // _ => return Err(GitUrlParseError::UnexpectedScheme), + // } + // } + // false => { + // if !url.starts_with("ssh") && splitpath.len() < 2 { + // return Err(GitUrlParseError::UnexpectedFormat); + // } + + // let position = match splitpath.len() { + // 0 => return Err(GitUrlParseError::UnexpectedFormat), + // 1 => 0, + // _ => 1, + // }; + + // // push owner + // fullname.push(splitpath[position]); + // // push name + // fullname.push(name.as_str()); + + // ( + // Some(splitpath[position].to_string()), + // None::, + // fullname.join("/"), + // ) + // } + // } + // } + //}; + + //let final_host = match scheme { + // Scheme::File => None, + // _ => normalized.host_str().map(|h| h.to_string()), + //}; + + //let final_path = match scheme { + // Scheme::File => { + // if let Some(host) = normalized.host_str() { + // format!("{}{}", host, urlpath) + // } else { + // urlpath + // } + // } + // _ => urlpath, + //}; + + //Ok(GitUrl { + // host: final_host, + // name, + // owner, + // organization, + // fullname, + // scheme, + // user: match normalized.username().to_string().len() { + // 0 => None, + // _ => Some(normalized.username().to_string()), + // }, + // token: normalized.password().map(|p| p.to_string()), + // port: normalized.port(), + // path: final_path, + // git_suffix: *git_suffix_check, + // scheme_prefix: url.contains("://") || url.starts_with("git:"), + //}) } } @@ -496,9 +553,7 @@ pub enum GitUrlParseError { #[error("No url scheme was found, then failed to normalize as file url.")] FileUrlNormalizeFailedNoScheme, - #[error( - "No url scheme was found, then failed to normalize as file url after adding 'file://'" - )] + #[error("No url scheme was found, then failed to normalize as file url after adding 'file://'")] FileUrlNormalizeFailedSchemeAdded, #[error("Git Url not in expected format")] diff --git a/tests/parse.rs b/tests/parse.rs index ae0118c..5f82b67 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -5,10 +5,10 @@ fn ssh_user_ports() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: Some("host.tld".to_string()), - name: "project-name".to_string(), - owner: Some("user".to_string()), - organization: None, - fullname: "user/project-name".to_string(), + //name: "project-name".to_string(), + //owner: Some("user".to_string()), + //organization: None, + //fullname: "user/project-name".to_string(), scheme: Scheme::Ssh, user: Some("git".to_string()), token: None, @@ -28,10 +28,10 @@ fn https_user_bitbucket() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: Some("bitbucket.org".to_string()), - name: "repo".to_string(), - owner: Some("user".to_string()), - organization: None, - fullname: "user/repo".to_string(), + //name: "repo".to_string(), + //owner: Some("user".to_string()), + //organization: None, + //fullname: "user/repo".to_string(), scheme: Scheme::Https, user: Some("user".to_string()), token: None, @@ -50,10 +50,10 @@ fn ssh_user_bitbucket() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: Some("bitbucket.org".to_string()), - name: "repo".to_string(), - owner: Some("user".to_string()), - organization: None, - fullname: "user/repo".to_string(), + //name: "repo".to_string(), + //owner: Some("user".to_string()), + //organization: None, + //fullname: "user/repo".to_string(), scheme: Scheme::Ssh, user: Some("git".to_string()), token: None, @@ -72,10 +72,10 @@ fn https_user_auth_bitbucket() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: Some("bitbucket.org".to_string()), - name: "name".to_string(), - owner: Some("owner".to_string()), - organization: None, - fullname: "owner/name".to_string(), + //name: "name".to_string(), + //owner: Some("owner".to_string()), + //organization: None, + //fullname: "owner/name".to_string(), scheme: Scheme::Https, user: Some("x-token-auth".to_string()), token: Some("token".to_string()), @@ -94,10 +94,10 @@ fn https_user_github() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: Some("github.com".to_string()), - name: "repo".to_string(), - owner: Some("user".to_string()), - organization: None, - fullname: "user/repo".to_string(), + //name: "repo".to_string(), + //owner: Some("user".to_string()), + //organization: None, + //fullname: "user/repo".to_string(), scheme: Scheme::Https, user: Some("user".to_string()), token: None, @@ -116,10 +116,10 @@ fn ssh_user_github() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: Some("github.com".to_string()), - name: "repo".to_string(), - owner: Some("user".to_string()), - organization: None, - fullname: "user/repo".to_string(), + //name: "repo".to_string(), + //owner: Some("user".to_string()), + //organization: None, + //fullname: "user/repo".to_string(), scheme: Scheme::Ssh, user: Some("git".to_string()), token: None, @@ -138,10 +138,10 @@ fn https_user_auth_github() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: Some("github.com".to_string()), - name: "name".to_string(), - owner: Some("owner".to_string()), - organization: None, - fullname: "owner/name".to_string(), + //name: "name".to_string(), + //owner: Some("owner".to_string()), + //organization: None, + //fullname: "owner/name".to_string(), scheme: Scheme::Https, user: Some("token".to_string()), token: Some("x-oauth-basic".to_string()), @@ -160,10 +160,10 @@ fn ssh_user_azure_devops() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: Some("ssh.dev.azure.com".to_string()), - name: "RepoName".to_string(), - owner: Some("ProjectName".to_string()), - organization: Some("CompanyName".to_string()), - fullname: "CompanyName/ProjectName/RepoName".to_string(), + //name: "RepoName".to_string(), + //owner: Some("ProjectName".to_string()), + //organization: Some("CompanyName".to_string()), + //fullname: "CompanyName/ProjectName/RepoName".to_string(), scheme: Scheme::Ssh, user: Some("git".to_string()), token: None, @@ -182,10 +182,10 @@ fn https_user_azure_devops() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: Some("dev.azure.com".to_string()), - name: "repo".to_string(), - owner: Some("project".to_string()), - organization: Some("organization".to_string()), - fullname: "organization/project/repo".to_string(), + //name: "repo".to_string(), + //owner: Some("project".to_string()), + //organization: Some("organization".to_string()), + //fullname: "organization/project/repo".to_string(), scheme: Scheme::Https, user: Some("organization".to_string()), token: None, @@ -204,10 +204,10 @@ fn ftp_user() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: Some("host.tld".to_string()), - name: "project-name".to_string(), - owner: Some("user".to_string()), - organization: None, - fullname: "user/project-name".to_string(), + //name: "project-name".to_string(), + //owner: Some("user".to_string()), + //organization: None, + //fullname: "user/project-name".to_string(), scheme: Scheme::Ftp, user: Some("git".to_string()), token: None, @@ -226,10 +226,10 @@ fn ftps_user() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: Some("host.tld".to_string()), - name: "project-name".to_string(), - owner: Some("user".to_string()), - organization: None, - fullname: "user/project-name".to_string(), + //name: "project-name".to_string(), + //owner: Some("user".to_string()), + //organization: None, + //fullname: "user/project-name".to_string(), scheme: Scheme::Ftps, user: Some("git".to_string()), token: None, @@ -248,10 +248,10 @@ fn relative_unix_path() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: None, - name: "project-name".to_string(), - owner: None, - organization: None, - fullname: "project-name".to_string(), + //name: "project-name".to_string(), + //owner: None, + //organization: None, + //fullname: "project-name".to_string(), scheme: Scheme::File, user: None, token: None, @@ -270,10 +270,10 @@ fn absolute_unix_path() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: None, - name: "project-name".to_string(), - owner: None, - organization: None, - fullname: "project-name".to_string(), + //name: "project-name".to_string(), + //owner: None, + //organization: None, + //fullname: "project-name".to_string(), scheme: Scheme::File, user: None, token: None, @@ -293,10 +293,10 @@ fn relative_windows_path() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: None, - name: "project-name".to_string(), - owner: None, - organization: None, - fullname: "project-name".to_string(), + //name: "project-name".to_string(), + //owner: None, + //organization: None, + //fullname: "project-name".to_string(), scheme: Scheme::File, user: None, token: None, @@ -317,10 +317,10 @@ fn absolute_windows_path() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: None, - name: "project-name".to_string(), - owner: None, - organization: None, - fullname: "project-name".to_string(), + //name: "project-name".to_string(), + //owner: None, + //organization: None, + //fullname: "project-name".to_string(), scheme: Scheme::File, user: None, token: None, @@ -351,10 +351,10 @@ fn ssh_without_organization() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: Some("f589726c3611".to_string()), - name: "repo".to_string(), - owner: Some("repo".to_string()), - organization: None, - fullname: "repo/repo".to_string(), + //name: "repo".to_string(), + //owner: Some("repo".to_string()), + //organization: None, + //fullname: "repo/repo".to_string(), scheme: Scheme::Ssh, user: None, token: None, @@ -393,10 +393,10 @@ fn git() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: Some("github.com".to_string()), - name: "name".to_string(), - owner: Some("owner".to_string()), - organization: None, - fullname: "owner/name".to_string(), + //name: "name".to_string(), + //owner: Some("owner".to_string()), + //organization: None, + //fullname: "owner/name".to_string(), scheme: Scheme::Git, user: None, token: None, From 159661e8f12cb93c85bd0c8a34bc0a21cba19ebb Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Wed, 13 Aug 2025 19:09:10 -0700 Subject: [PATCH 02/32] end to end parsing wip Tests aren't passing yet --- examples/nom.rs | 9 +- src/lib.rs | 274 +++++++++++++++++++++++++++++++++++++++------ tests/parse.rs | 53 ++++++--- tests/trim_auth.rs | 6 +- 4 files changed, 281 insertions(+), 61 deletions(-) diff --git a/examples/nom.rs b/examples/nom.rs index e37f962..e47a1a3 100644 --- a/examples/nom.rs +++ b/examples/nom.rs @@ -8,8 +8,10 @@ fn main() -> Result<(), GitUrlParseError> { env_logger::init(); let test_vec = vec![ - "https://github.com/tjtelan/orbitalci.git", - "git@github.com:tjtelan/orbitalci.git", + "https://github.com/tjtelan/git-url-parse-rs.git", + "git@github.com:tjtelan/git-url-parse-rs.git", + "git@hostname:22/path/to/repo.git", + "ssh://git@github.com:22/asdf/asdf.git", "https://token:x-oauth-basic@host.xz/path/to/repo.git/", "https://x-token-auth:token@host.xz/path/to/repo.git/", "git+ssh://git@some-host.com/and-the-path/name", @@ -30,7 +32,8 @@ fn main() -> Result<(), GitUrlParseError> { //println!("leftover:{leftover:#?}, output:{output:#?}"); //let parsed = GitUrl::parse(test_url)?; //println!("Original: {}", test_url); - println!("Parsed: {:?}", parsed); + println!("Parsed: {}", parsed); + println!("Parsed: {:#?}", parsed); //println!("{:?}\n", parsed); } Ok(()) diff --git a/src/lib.rs b/src/lib.rs index ee46533..5ba0adb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,17 +5,15 @@ use thiserror::Error; use url::Url; use nom::branch::alt; -use nom::bytes::complete::{is_not, tag, take_till, take_until, take_while}; -use nom::character::complete::{anychar, char, one_of}; -use nom::multi::many0; +use nom::bytes::complete::{tag, take_till, take_until, take_while}; use nom::sequence::{preceded, terminated}; -use nom::{AsBytes, IResult, Parser}; +use nom::{IResult, Parser, combinator::opt}; #[cfg(feature = "tracing")] use tracing::debug; /// Supported uri schemes for parsing -#[derive(Debug, PartialEq, Eq, EnumString, VariantNames, Clone, Display, Copy)] +#[derive(Debug, PartialEq, Eq, EnumString, VariantNames, Clone, Display)] #[strum(serialize_all = "kebab_case")] pub enum Scheme { /// Represents `file://` url scheme @@ -35,12 +33,24 @@ pub enum Scheme { Https, /// Represents `ssh://` url scheme Ssh, - /// Represents No url scheme - Unspecified, + ///// Represents No url scheme + //Unspecified, + /// + Other(String), // todo: need test for this } -fn scheme(input: &str) -> IResult<&str, &str> { - terminated( +#[derive(Debug, Default, PartialEq, Eq)] +enum GitUrlParseHint { + #[default] + Unknown, + Sshlike, + Filelike, + Httplike, + //Custom // needed? +} + +fn scheme(input: &str) -> IResult<&str, Option<&str>> { + opt(terminated( alt(( tag(Scheme::File.to_string().as_bytes()), tag(Scheme::Ftps.to_string().as_bytes()), @@ -50,18 +60,91 @@ fn scheme(input: &str) -> IResult<&str, &str> { tag(Scheme::Https.to_string().as_bytes()), tag(Scheme::Http.to_string().as_bytes()), tag(Scheme::Ssh.to_string().as_bytes()), + // todo: Other(), needs a test )), tag("://"), - ) + )) .parse(input) } -fn username(input: &str) -> IResult<&str, &str> { - terminated(take_until("@"), tag("@")).parse(input) +fn username(input: &str) -> IResult<&str, Option<&str>> { + opt(terminated(take_until("@"), tag("@"))).parse(input) +} + +fn token(input: &str) -> IResult<&str, Option<&str>> { + opt(terminated(take_until(":"), tag(":"))).parse(input) +} + +fn hostname(input: &str) -> IResult<&str, Option<&str>> { + opt(take_till(|c| c == '/' || c == ':')).parse(input) +} + +fn port(input: &str) -> IResult<&str, Option<&str>> { + opt(preceded(tag(":"), take_while(|c: char| c.is_digit(10)))).parse(input) } -fn hostname(input: &str) -> IResult<&str, &str> { - terminated(is_not("/:"), one_of("/:")).parse(input) +#[derive(Debug, Default)] +struct GitUrlBuilder { + hint: GitUrlParseHint, + scheme: Option, + user: Option, + token: Option, + host: Option, + port: Option, + path: String, + print_scheme: bool, +} + +impl GitUrlBuilder { + fn init(url: &str) -> Result { + // Error if there are null bytes within the url + + // https://github.com/tjtelan/git-url-parse-rs/issues/16 + if url.contains('\0') { + return Err(GitUrlParseError::FoundNullBytes); + } + Ok(GitUrlBuilder::default()) + } + + //fn precheck(&mut self, input: &str) -> Result { + // // Error if there are null bytes within the url + // // https://github.com/tjtelan/git-url-parse-rs/issues/16 + // if input.contains('\0') { + // return Err(GitUrlParseError::FoundNullBytes); + // } + + // // get scheme and user + // // it is a file path if it doesn't have either one of these, but not definitively + // // if it has a port, it is not a file path + + // Ok(self) + + //} + + // should I validate that there are values, not empty strings? + fn build(&self) -> GitUrl { + let mut git_url = GitUrl::default(); + + if let Some(scheme) = self.scheme.clone() { + git_url.scheme = Some(scheme); + git_url.print_scheme = self.print_scheme; + } + if let Some(user) = self.user.clone() { + git_url.user = Some(user.clone()); + } + if let Some(token) = self.token.clone() { + git_url.token = Some(token.clone()); + } + if let Some(host) = self.host.clone() { + git_url.host = Some(host.clone()); + } + if let Some(port) = self.port { + git_url.port = Some(port); + } + git_url.path = self.path.clone(); + + git_url + } } /// GitUrl represents an input url that is a url used by git @@ -81,7 +164,7 @@ pub struct GitUrl { ///// The full name of the repo, formatted as "owner/name" //pub fullname: String, ///// The git url scheme - pub scheme: Scheme, + pub scheme: Option, /// The authentication user pub user: Option, /// The oauth token (could appear in the https urls) @@ -94,28 +177,41 @@ pub struct GitUrl { pub git_suffix: bool, /// Indicate if url explicitly uses its scheme pub scheme_prefix: bool, + pub print_scheme: bool, } /// Build the printable GitUrl from its components impl fmt::Display for GitUrl { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let scheme_prefix = match self.scheme_prefix { - true => format!("{}://", self.scheme), - false => String::new(), + //let scheme_prefix = match self.scheme_prefix { + // true => format!("{}://", self.scheme), + // false => String::new(), + //}; + + let scheme = if let Some(scheme) = &self.scheme && self.print_scheme { + format!("{}://", scheme) + } else { + String::new() }; + //let scheme_prefix = if self.print_scheme && self.scheme_prefix { + // format!("{}://", self.scheme) + //} else { + // String::new() + //}; + let auth_info = match self.scheme { - Scheme::Ssh | Scheme::Git | Scheme::GitSsh => { + Some(Scheme::Ssh) | Some(Scheme::Git) | Some(Scheme::GitSsh) => { if let Some(user) = &self.user { - format!("{}@", user) + format!("{user}@") } else { String::new() } } - Scheme::Http | Scheme::Https => match (&self.user, &self.token) { - (Some(user), Some(token)) => format!("{}:{}@", user, token), - (Some(user), None) => format!("{}@", user), - (None, Some(token)) => format!("{}@", token), + Some(Scheme::Http) | Some(Scheme::Https) => match (&self.user, &self.token) { + (Some(user), Some(token)) => format!("{user}:{token}@"), + (Some(user), None) => format!("{user}@", ), + (None, Some(token)) => format!("{token}@"), (None, None) => String::new(), }, _ => String::new(), @@ -131,18 +227,32 @@ impl fmt::Display for GitUrl { None => String::new(), }; - let path = match &self.scheme { - Scheme::Ssh => { - if self.port.is_some() { + //let path = match &self.scheme { + // Scheme::Ssh => { + // if self.port.is_some() { + // format!("/{}", &self.path) + // } else { + // format!(":{}", &self.path) + // } + // } + // _ => self.path.to_string(), + //}; + + let path = if self.scheme == Some(Scheme::Ssh) { + if self.port.is_some() { + if !self.path.starts_with('/') { format!("/{}", &self.path) } else { - format!(":{}", &self.path) + self.path.to_string() } + } else { + format!(":{}", &self.path) } - _ => self.path.to_string(), + } else { + self.path.to_string() }; - let git_url_str = format!("{}{}{}{}{}", scheme_prefix, auth_info, host, port, path); + let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}"); write!(f, "{}", git_url_str) } @@ -156,13 +266,14 @@ impl Default for GitUrl { //owner: None, //organization: None, //fullname: "".to_string(), - scheme: Scheme::Unspecified, + scheme: None, user: None, token: None, port: None, path: "".to_string(), git_suffix: false, scheme_prefix: false, + print_scheme: false, } } } @@ -187,31 +298,120 @@ impl GitUrl { /// Returns a `Result` after normalizing and parsing `url` for metadata pub fn parse(url: &str) -> Result { + + + + println!("start: {url}"); - let mut giturl = GitUrl::default(); + let mut giturl = GitUrlBuilder::default(); + + // todo: check if ssh url or file url early let mut working_url = url; - if let Ok((leftover, scheme)) = scheme(working_url) { + if let Ok((leftover, Some(scheme))) = scheme(working_url) { println!("leftover: {leftover}, scheme: {scheme:?}"); - giturl.scheme = Scheme::from_str(scheme).expect("Unknown scheme"); + + let s = Scheme::from_str(scheme).expect("Unknown scheme"); + + giturl.scheme = Some(s.clone()); + giturl.print_scheme = true; working_url = leftover; + + giturl.hint = match s { + Scheme::Ssh => GitUrlParseHint::Sshlike, + Scheme::File => GitUrlParseHint::Filelike, + _ => GitUrlParseHint::Httplike, + } } - if let Ok((leftover, username)) = username(working_url) { + if let Ok((leftover, Some(username))) = username(working_url) { println!("leftover: {leftover}, username: {username:?}"); giturl.user = Some(username.to_string()); + working_url = leftover; + + if giturl.hint == GitUrlParseHint::Unknown { + giturl.hint = GitUrlParseHint::Sshlike; + } + + if let Ok((token, Some(real_username))) = token(username) { + println!("token: {token}, real_username: {real_username:?}"); + giturl.user = Some(real_username.to_string()); + giturl.token = Some(token.to_string()); + + if giturl.hint == GitUrlParseHint::Unknown + || giturl.hint == GitUrlParseHint::Sshlike + { + giturl.hint = GitUrlParseHint::Httplike; + } + } } - if let Ok((leftover, hostname)) = hostname(working_url) { + // Sanity check before proceeding + // if we don't have a scheme or user, we don't know if the next component is an host:port or a file path + //match (&giturl.scheme, &giturl.user) { + // (Some(Scheme::Ssh), _) => { + // giturl.hint = GitUrlParseHint::Sshlike + // } + // (Some(Scheme::File), _) => giturl.hint = GitUrlParseHint::Filelike, + // (None, Some(_)) => { + // if let Some(_) = giturl.token { + // giturl.hint = GitUrlParseHint::Httplike; + // } + // } + // (Some(_), _) => giturl.hint = GitUrlParseHint::Httplike, + // (None, _) => { + // if !working_url.contains(':') {} + // giturl.hint = GitUrlParseHint::Filelike; + // // Is this the correct assumption? No scheme and no user? + // } + //}; + + //match &giturl.hint { + // GitUrlParseHint::Httplike | GitUrlParseHint::Sshlike => {} + // GitUrlParseHint::Filelike | GitUrlParseHint::Unknown => {} + //} + + let save_state = working_url; + + if let Ok((leftover, Some(hostname))) = hostname(working_url) { println!("leftover {leftover}, hostname: {hostname}"); giturl.host = Some(hostname.to_string()); working_url = leftover; + + if let Ok((leftover, Some(port))) = port(leftover) { + if !port.is_empty() { + println!("leftover {leftover}, port: {port}"); + giturl.port = Some(u16::from_str(port).expect("Not a valid port")); + working_url = leftover; + + if giturl.hint == GitUrlParseHint::Unknown { + giturl.hint = GitUrlParseHint::Httplike; + } + } + } + } + + if giturl.hint == GitUrlParseHint::Sshlike { + if let Some(ssh_path) = working_url.strip_prefix(":") { + working_url = ssh_path; + // This is important for printing the url correctly with the ":" + giturl.scheme = Some(Scheme::Ssh) + } } + if giturl.hint == GitUrlParseHint::Unknown { + working_url = save_state; + giturl.host = None; + giturl.scheme = Some(Scheme::File); + } + + + giturl.path = working_url.to_string(); + println!(""); - Ok(giturl) + Ok(giturl.build()) //// Normalize the url so we can use Url crate to process ssh urls //let normalized = normalize_url(url)?; diff --git a/tests/parse.rs b/tests/parse.rs index 5f82b67..f8fdbb8 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -9,13 +9,14 @@ fn ssh_user_ports() { //owner: Some("user".to_string()), //organization: None, //fullname: "user/project-name".to_string(), - scheme: Scheme::Ssh, + scheme: Some(Scheme::Ssh), user: Some("git".to_string()), token: None, port: Some(9999), path: "user/project-name.git".to_string(), git_suffix: true, scheme_prefix: true, + print_scheme: true, }; assert_eq!(parsed, expected); @@ -32,13 +33,14 @@ fn https_user_bitbucket() { //owner: Some("user".to_string()), //organization: None, //fullname: "user/repo".to_string(), - scheme: Scheme::Https, + scheme: Some(Scheme::Https), user: Some("user".to_string()), token: None, port: None, path: "/user/repo.git".to_string(), git_suffix: true, scheme_prefix: true, + print_scheme: true, }; assert_eq!(parsed, expected); @@ -54,13 +56,14 @@ fn ssh_user_bitbucket() { //owner: Some("user".to_string()), //organization: None, //fullname: "user/repo".to_string(), - scheme: Scheme::Ssh, + scheme: Some(Scheme::Ssh), user: Some("git".to_string()), token: None, port: None, path: "user/repo.git".to_string(), git_suffix: true, scheme_prefix: false, + print_scheme: false, }; assert_eq!(parsed, expected); @@ -76,13 +79,14 @@ fn https_user_auth_bitbucket() { //owner: Some("owner".to_string()), //organization: None, //fullname: "owner/name".to_string(), - scheme: Scheme::Https, + scheme: Some(Scheme::Https), user: Some("x-token-auth".to_string()), token: Some("token".to_string()), port: None, path: "/owner/name.git".to_string(), git_suffix: true, scheme_prefix: true, + print_scheme: true, }; assert_eq!(parsed, expected); @@ -98,13 +102,14 @@ fn https_user_github() { //owner: Some("user".to_string()), //organization: None, //fullname: "user/repo".to_string(), - scheme: Scheme::Https, + scheme: Some(Scheme::Https), user: Some("user".to_string()), token: None, port: None, path: "/user/repo.git".to_string(), git_suffix: true, scheme_prefix: true, + print_scheme: true, }; assert_eq!(parsed, expected); @@ -120,13 +125,14 @@ fn ssh_user_github() { //owner: Some("user".to_string()), //organization: None, //fullname: "user/repo".to_string(), - scheme: Scheme::Ssh, + scheme: Some(Scheme::Ssh), user: Some("git".to_string()), token: None, port: None, path: "user/repo.git".to_string(), git_suffix: true, scheme_prefix: false, + print_scheme: false, }; assert_eq!(parsed, expected); @@ -142,13 +148,14 @@ fn https_user_auth_github() { //owner: Some("owner".to_string()), //organization: None, //fullname: "owner/name".to_string(), - scheme: Scheme::Https, + scheme: Some(Scheme::Https), user: Some("token".to_string()), token: Some("x-oauth-basic".to_string()), port: None, path: "/owner/name.git".to_string(), git_suffix: true, scheme_prefix: true, + print_scheme: true, }; assert_eq!(parsed, expected); @@ -164,13 +171,14 @@ fn ssh_user_azure_devops() { //owner: Some("ProjectName".to_string()), //organization: Some("CompanyName".to_string()), //fullname: "CompanyName/ProjectName/RepoName".to_string(), - scheme: Scheme::Ssh, + scheme: Some(Scheme::Ssh), user: Some("git".to_string()), token: None, port: None, path: "v3/CompanyName/ProjectName/RepoName".to_string(), git_suffix: false, scheme_prefix: false, + print_scheme: false, }; assert_eq!(parsed, expected); @@ -186,13 +194,14 @@ fn https_user_azure_devops() { //owner: Some("project".to_string()), //organization: Some("organization".to_string()), //fullname: "organization/project/repo".to_string(), - scheme: Scheme::Https, + scheme: Some(Scheme::Https), user: Some("organization".to_string()), token: None, port: None, path: "/organization/project/_git/repo".to_string(), git_suffix: false, scheme_prefix: true, + print_scheme: true, }; assert_eq!(parsed, expected); @@ -208,13 +217,14 @@ fn ftp_user() { //owner: Some("user".to_string()), //organization: None, //fullname: "user/project-name".to_string(), - scheme: Scheme::Ftp, + scheme: Some(Scheme::Ftp), user: Some("git".to_string()), token: None, port: None, path: "/user/project-name.git".to_string(), git_suffix: true, scheme_prefix: true, + print_scheme: true, }; assert_eq!(parsed, expected); @@ -230,13 +240,14 @@ fn ftps_user() { //owner: Some("user".to_string()), //organization: None, //fullname: "user/project-name".to_string(), - scheme: Scheme::Ftps, + scheme: Some(Scheme::Ftps), user: Some("git".to_string()), token: None, port: None, path: "/user/project-name.git".to_string(), git_suffix: true, scheme_prefix: true, + print_scheme: true, }; assert_eq!(parsed, expected); @@ -252,13 +263,14 @@ fn relative_unix_path() { //owner: None, //organization: None, //fullname: "project-name".to_string(), - scheme: Scheme::File, + scheme: Some(Scheme::File), user: None, token: None, port: None, path: "../project-name.git".to_string(), git_suffix: true, scheme_prefix: false, + print_scheme: false, }; assert_eq!(parsed, expected); @@ -274,13 +286,14 @@ fn absolute_unix_path() { //owner: None, //organization: None, //fullname: "project-name".to_string(), - scheme: Scheme::File, + scheme: Some(Scheme::File), user: None, token: None, port: None, path: "/path/to/project-name.git".to_string(), git_suffix: true, scheme_prefix: false, + print_scheme: false, }; assert_eq!(parsed, expected); @@ -297,13 +310,14 @@ fn relative_windows_path() { //owner: None, //organization: None, //fullname: "project-name".to_string(), - scheme: Scheme::File, + scheme: Some(Scheme::File), user: None, token: None, port: None, path: "../project-name.git".to_string(), git_suffix: true, scheme_prefix: false, + print_scheme: false, }; assert_eq!(parsed, expected); @@ -321,13 +335,14 @@ fn absolute_windows_path() { //owner: None, //organization: None, //fullname: "project-name".to_string(), - scheme: Scheme::File, + scheme: Some(Scheme::File), user: None, token: None, port: None, path: "c:\\project-name.git".to_string(), git_suffix: true, - scheme_prefix: true, + scheme_prefix: false, + print_scheme: false, }; assert_eq!(parsed, expected); @@ -355,13 +370,14 @@ fn ssh_without_organization() { //owner: Some("repo".to_string()), //organization: None, //fullname: "repo/repo".to_string(), - scheme: Scheme::Ssh, + scheme: Some(Scheme::Ssh), user: None, token: None, port: Some(29418), path: "repo".to_string(), git_suffix: false, scheme_prefix: true, + print_scheme: true, }; assert_eq!(parsed, expected); @@ -397,13 +413,14 @@ fn git() { //owner: Some("owner".to_string()), //organization: None, //fullname: "owner/name".to_string(), - scheme: Scheme::Git, + scheme: Some(Scheme::Git), user: None, token: None, port: None, path: "/owner/name.git".to_string(), git_suffix: true, scheme_prefix: true, + print_scheme: false, }; assert_eq!(parsed, expected); diff --git a/tests/trim_auth.rs b/tests/trim_auth.rs index 327db3e..083dbba 100644 --- a/tests/trim_auth.rs +++ b/tests/trim_auth.rs @@ -40,7 +40,7 @@ fn https_user_auth_bitbucket() { let parsed_and_trimmed = GitUrl::parse(test_url) .expect("URL parse failed") .trim_auth(); - let expected = "https://bitbucket.org/owner/name.git"; + let expected = "https://bitbucket.org/owner/name.git/"; assert_eq!(format!("{}", parsed_and_trimmed), expected); } @@ -51,7 +51,7 @@ fn https_user_github() { let parsed_and_trimmed = GitUrl::parse(test_url) .expect("URL parse failed") .trim_auth(); - let expected = "https://github.com/user/repo.git"; + let expected = "https://github.com/user/repo.git/"; assert_eq!(format!("{}", parsed_and_trimmed), expected); } @@ -73,7 +73,7 @@ fn https_user_auth_github() { let parsed_and_trimmed = GitUrl::parse(test_url) .expect("URL parse failed") .trim_auth(); - let expected = "https://github.com/owner/name.git"; + let expected = "https://github.com/owner/name.git/"; assert_eq!(format!("{}", parsed_and_trimmed), expected); } From 103f52943640cd8df13a6751975dfdd720c0924e Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Wed, 13 Aug 2025 21:53:46 -0700 Subject: [PATCH 03/32] More tests passing --- src/lib.rs | 115 +++++++++++++++++++-------------------- tests/parse.rs | 142 +++++++++++++++++++++++++------------------------ 2 files changed, 129 insertions(+), 128 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5ba0adb..c0ab2ec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,8 +6,9 @@ use url::Url; use nom::branch::alt; use nom::bytes::complete::{tag, take_till, take_until, take_while}; +use nom::character::complete::one_of; use nom::sequence::{preceded, terminated}; -use nom::{IResult, Parser, combinator::opt}; +use nom::{IResult, Parser, combinator::opt, combinator::rest}; #[cfg(feature = "tracing")] use tracing::debug; @@ -46,7 +47,7 @@ enum GitUrlParseHint { Sshlike, Filelike, Httplike, - //Custom // needed? + //Custom // needed? } fn scheme(input: &str) -> IResult<&str, Option<&str>> { @@ -83,6 +84,16 @@ fn port(input: &str) -> IResult<&str, Option<&str>> { opt(preceded(tag(":"), take_while(|c: char| c.is_digit(10)))).parse(input) } +// This is making an assumption that the path is relative, not absolute +// This is bc we do not support absolute paths when we also have a port +fn ssh_path(input: &str) -> IResult<&str, Option<&str>> { + opt(preceded(one_of("/:"), rest)).parse(input) +} + +fn path(input: &str) -> IResult<&str, &str> { + rest(input) +} + #[derive(Debug, Default)] struct GitUrlBuilder { hint: GitUrlParseHint, @@ -93,11 +104,15 @@ struct GitUrlBuilder { port: Option, path: String, print_scheme: bool, + //working_url: String, } impl GitUrlBuilder { fn init(url: &str) -> Result { - // Error if there are null bytes within the url + #[cfg(feature = "tracing")] + debug!("Processing: {:?}", &url); + + // Error if there are null bytes within the url // https://github.com/tjtelan/git-url-parse-rs/issues/16 if url.contains('\0') { @@ -106,20 +121,7 @@ impl GitUrlBuilder { Ok(GitUrlBuilder::default()) } - //fn precheck(&mut self, input: &str) -> Result { - // // Error if there are null bytes within the url - // // https://github.com/tjtelan/git-url-parse-rs/issues/16 - // if input.contains('\0') { - // return Err(GitUrlParseError::FoundNullBytes); - // } - - // // get scheme and user - // // it is a file path if it doesn't have either one of these, but not definitively - // // if it has a port, it is not a file path - - // Ok(self) - - //} + //fn parse_scheme(&mut self, input: &str) -> Result // should I validate that there are values, not empty strings? fn build(&self) -> GitUrl { @@ -173,10 +175,10 @@ pub struct GitUrl { pub port: Option, /// The path to repo w/ respect to user + hostname pub path: String, - /// Indicate if url uses the .git suffix - pub git_suffix: bool, - /// Indicate if url explicitly uses its scheme - pub scheme_prefix: bool, + ///// Indicate if url uses the .git suffix + //pub git_suffix: bool, + ///// Indicate if url explicitly uses its scheme + //pub scheme_prefix: bool, pub print_scheme: bool, } @@ -188,7 +190,9 @@ impl fmt::Display for GitUrl { // false => String::new(), //}; - let scheme = if let Some(scheme) = &self.scheme && self.print_scheme { + let scheme = if let Some(scheme) = &self.scheme + && self.print_scheme + { format!("{}://", scheme) } else { String::new() @@ -210,7 +214,7 @@ impl fmt::Display for GitUrl { } Some(Scheme::Http) | Some(Scheme::Https) => match (&self.user, &self.token) { (Some(user), Some(token)) => format!("{user}:{token}@"), - (Some(user), None) => format!("{user}@", ), + (Some(user), None) => format!("{user}@",), (None, Some(token)) => format!("{token}@"), (None, None) => String::new(), }, @@ -271,8 +275,8 @@ impl Default for GitUrl { token: None, port: None, path: "".to_string(), - git_suffix: false, - scheme_prefix: false, + //git_suffix: false, + //scheme_prefix: false, print_scheme: false, } } @@ -296,16 +300,24 @@ impl GitUrl { new_giturl } - /// Returns a `Result` after normalizing and parsing `url` for metadata - pub fn parse(url: &str) -> Result { - + fn build_init(url: &str) -> Result { + #[cfg(feature = "tracing")] + debug!("Processing: {:?}", &url); + // TODO: Should this be extended to check for any whitespace? + // Error if there are null bytes within the url + // https://github.com/tjtelan/git-url-parse-rs/issues/16 + if url.contains('\0') { + return Err(GitUrlParseError::FoundNullBytes); + } + Ok(GitUrlBuilder::default()) + } + /// Returns a `Result` after normalizing and parsing `url` for metadata + pub fn parse(url: &str) -> Result { println!("start: {url}"); - let mut giturl = GitUrlBuilder::default(); - - // todo: check if ssh url or file url early + let mut giturl = GitUrlBuilder::init(url)?; let mut working_url = url; @@ -348,31 +360,6 @@ impl GitUrl { } } - // Sanity check before proceeding - // if we don't have a scheme or user, we don't know if the next component is an host:port or a file path - //match (&giturl.scheme, &giturl.user) { - // (Some(Scheme::Ssh), _) => { - // giturl.hint = GitUrlParseHint::Sshlike - // } - // (Some(Scheme::File), _) => giturl.hint = GitUrlParseHint::Filelike, - // (None, Some(_)) => { - // if let Some(_) = giturl.token { - // giturl.hint = GitUrlParseHint::Httplike; - // } - // } - // (Some(_), _) => giturl.hint = GitUrlParseHint::Httplike, - // (None, _) => { - // if !working_url.contains(':') {} - // giturl.hint = GitUrlParseHint::Filelike; - // // Is this the correct assumption? No scheme and no user? - // } - //}; - - //match &giturl.hint { - // GitUrlParseHint::Httplike | GitUrlParseHint::Sshlike => {} - // GitUrlParseHint::Filelike | GitUrlParseHint::Unknown => {} - //} - let save_state = working_url; if let Ok((leftover, Some(hostname))) = hostname(working_url) { @@ -394,8 +381,12 @@ impl GitUrl { } if giturl.hint == GitUrlParseHint::Sshlike { - if let Some(ssh_path) = working_url.strip_prefix(":") { - working_url = ssh_path; + // https://mslinn.com/git/040-git-urls.html - we only support relative paths when we have ports + //if let Some(ssh_path) = working_url.strip_prefix(":") { + //} + + if let Ok((_leftover, Some(path))) = ssh_path(working_url) { + working_url = path; // This is important for printing the url correctly with the ":" giturl.scheme = Some(Scheme::Ssh) } @@ -407,8 +398,14 @@ impl GitUrl { giturl.scheme = Some(Scheme::File); } + if let Ok((leftover, path)) = path(working_url) { + println!("leftover {leftover}, path: {path}"); + if path.is_empty() { + return Err(GitUrlParseError::EmptyPath); + } - giturl.path = working_url.to_string(); + giturl.path = path.to_string(); + } println!(""); Ok(giturl.build()) diff --git a/tests/parse.rs b/tests/parse.rs index f8fdbb8..800e361 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -14,8 +14,8 @@ fn ssh_user_ports() { token: None, port: Some(9999), path: "user/project-name.git".to_string(), - git_suffix: true, - scheme_prefix: true, + //git_suffix: true, + //scheme_prefix: true, print_scheme: true, }; @@ -38,8 +38,8 @@ fn https_user_bitbucket() { token: None, port: None, path: "/user/repo.git".to_string(), - git_suffix: true, - scheme_prefix: true, + //git_suffix: true, + //scheme_prefix: true, print_scheme: true, }; @@ -61,8 +61,8 @@ fn ssh_user_bitbucket() { token: None, port: None, path: "user/repo.git".to_string(), - git_suffix: true, - scheme_prefix: false, + //git_suffix: true, + //scheme_prefix: false, print_scheme: false, }; @@ -84,8 +84,8 @@ fn https_user_auth_bitbucket() { token: Some("token".to_string()), port: None, path: "/owner/name.git".to_string(), - git_suffix: true, - scheme_prefix: true, + //git_suffix: true, + //scheme_prefix: true, print_scheme: true, }; @@ -107,8 +107,8 @@ fn https_user_github() { token: None, port: None, path: "/user/repo.git".to_string(), - git_suffix: true, - scheme_prefix: true, + //git_suffix: true, + //scheme_prefix: true, print_scheme: true, }; @@ -130,8 +130,8 @@ fn ssh_user_github() { token: None, port: None, path: "user/repo.git".to_string(), - git_suffix: true, - scheme_prefix: false, + //git_suffix: true, + //scheme_prefix: false, print_scheme: false, }; @@ -153,8 +153,8 @@ fn https_user_auth_github() { token: Some("x-oauth-basic".to_string()), port: None, path: "/owner/name.git".to_string(), - git_suffix: true, - scheme_prefix: true, + //git_suffix: true, + //scheme_prefix: true, print_scheme: true, }; @@ -176,8 +176,8 @@ fn ssh_user_azure_devops() { token: None, port: None, path: "v3/CompanyName/ProjectName/RepoName".to_string(), - git_suffix: false, - scheme_prefix: false, + //git_suffix: false, + //scheme_prefix: false, print_scheme: false, }; @@ -199,8 +199,8 @@ fn https_user_azure_devops() { token: None, port: None, path: "/organization/project/_git/repo".to_string(), - git_suffix: false, - scheme_prefix: true, + //git_suffix: false, + //scheme_prefix: true, print_scheme: true, }; @@ -222,8 +222,8 @@ fn ftp_user() { token: None, port: None, path: "/user/project-name.git".to_string(), - git_suffix: true, - scheme_prefix: true, + //git_suffix: true, + //scheme_prefix: true, print_scheme: true, }; @@ -245,8 +245,8 @@ fn ftps_user() { token: None, port: None, path: "/user/project-name.git".to_string(), - git_suffix: true, - scheme_prefix: true, + //git_suffix: true, + //scheme_prefix: true, print_scheme: true, }; @@ -268,8 +268,8 @@ fn relative_unix_path() { token: None, port: None, path: "../project-name.git".to_string(), - git_suffix: true, - scheme_prefix: false, + //git_suffix: true, + //scheme_prefix: false, print_scheme: false, }; @@ -291,8 +291,8 @@ fn absolute_unix_path() { token: None, port: None, path: "/path/to/project-name.git".to_string(), - git_suffix: true, - scheme_prefix: false, + //git_suffix: true, + //scheme_prefix: false, print_scheme: false, }; @@ -315,14 +315,15 @@ fn relative_windows_path() { token: None, port: None, path: "../project-name.git".to_string(), - git_suffix: true, - scheme_prefix: false, + //git_suffix: true, + //scheme_prefix: false, print_scheme: false, }; assert_eq!(parsed, expected); } +// Can I use `typed-path` to deal with this? // Issue #7 - Absolute Windows paths will not parse at all #[should_panic(expected = "URL parse failed: UnexpectedFormat")] #[test] @@ -340,54 +341,56 @@ fn absolute_windows_path() { token: None, port: None, path: "c:\\project-name.git".to_string(), - git_suffix: true, - scheme_prefix: false, + //git_suffix: true, + //scheme_prefix: false, print_scheme: false, }; assert_eq!(parsed, expected); } -#[test] -fn ssh_user_path_not_acctname_reponame_format() { - let test_url = "git@test.com:repo"; - let e = GitUrl::parse(test_url); - - assert!(e.is_err()); - assert_eq!( - format!("{}", e.err().unwrap()), - "Git Url not in expected format" - ); -} - -#[test] -fn ssh_without_organization() { - let test_url = "ssh://f589726c3611:29418/repo"; - let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrl { - host: Some("f589726c3611".to_string()), - //name: "repo".to_string(), - //owner: Some("repo".to_string()), - //organization: None, - //fullname: "repo/repo".to_string(), - scheme: Some(Scheme::Ssh), - user: None, - token: None, - port: Some(29418), - path: "repo".to_string(), - git_suffix: false, - scheme_prefix: true, - print_scheme: true, - }; - - assert_eq!(parsed, expected); -} +// Move test +//#[test] +//fn ssh_user_path_not_acctname_reponame_format() { +// let test_url = "git@test.com:repo"; +// let e = GitUrl::parse(test_url); +// +// assert!(e.is_err()); +// assert_eq!( +// format!("{}", e.err().unwrap()), +// "Git Url not in expected format" +// ); +//} + +// Move test +//#[test] +//fn ssh_without_organization() { +// let test_url = "ssh://f589726c3611:29418/repo"; +// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); +// let expected = GitUrl { +// host: Some("f589726c3611".to_string()), +// //name: "repo".to_string(), +// //owner: Some("repo".to_string()), +// //organization: None, +// //fullname: "repo/repo".to_string(), +// scheme: Some(Scheme::Ssh), +// user: None, +// token: None, +// port: Some(29418), +// path: "repo".to_string(), +// //git_suffix: false, +// //scheme_prefix: true, +// print_scheme: true, +// }; +// +// assert_eq!(parsed, expected); +//} #[test] fn empty_path() { assert_eq!( GitUrlParseError::EmptyPath, - GitUrl::parse("git:").unwrap_err() + GitUrl::parse("file://").unwrap_err() ) } @@ -403,9 +406,10 @@ fn bad_port_number() { ); } +// This test might not have a use anymore if we're not expanding "git:" -> "git://" #[test] fn git() { - let test_url = "git:github.com/owner/name.git"; + let test_url = "git://github.com/owner/name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrl { host: Some("github.com".to_string()), @@ -418,9 +422,9 @@ fn git() { token: None, port: None, path: "/owner/name.git".to_string(), - git_suffix: true, - scheme_prefix: true, - print_scheme: false, + //git_suffix: true, + //scheme_prefix: true, + print_scheme: true, }; assert_eq!(parsed, expected); From 6d8301b467ee69676e43ba1f2d277febd115014a Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Wed, 13 Aug 2025 23:35:37 -0700 Subject: [PATCH 04/32] Moving around code Parse code is a little neater and high level --- src/lib.rs | 315 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 177 insertions(+), 138 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c0ab2ec..eb640b5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,7 +40,7 @@ pub enum Scheme { Other(String), // todo: need test for this } -#[derive(Debug, Default, PartialEq, Eq)] +#[derive(Clone, Debug, Default, PartialEq, Eq)] enum GitUrlParseHint { #[default] Unknown, @@ -50,51 +50,7 @@ enum GitUrlParseHint { //Custom // needed? } -fn scheme(input: &str) -> IResult<&str, Option<&str>> { - opt(terminated( - alt(( - tag(Scheme::File.to_string().as_bytes()), - tag(Scheme::Ftps.to_string().as_bytes()), - tag(Scheme::Ftp.to_string().as_bytes()), - tag(Scheme::GitSsh.to_string().as_bytes()), - tag(Scheme::Git.to_string().as_bytes()), - tag(Scheme::Https.to_string().as_bytes()), - tag(Scheme::Http.to_string().as_bytes()), - tag(Scheme::Ssh.to_string().as_bytes()), - // todo: Other(), needs a test - )), - tag("://"), - )) - .parse(input) -} - -fn username(input: &str) -> IResult<&str, Option<&str>> { - opt(terminated(take_until("@"), tag("@"))).parse(input) -} - -fn token(input: &str) -> IResult<&str, Option<&str>> { - opt(terminated(take_until(":"), tag(":"))).parse(input) -} - -fn hostname(input: &str) -> IResult<&str, Option<&str>> { - opt(take_till(|c| c == '/' || c == ':')).parse(input) -} - -fn port(input: &str) -> IResult<&str, Option<&str>> { - opt(preceded(tag(":"), take_while(|c: char| c.is_digit(10)))).parse(input) -} - -// This is making an assumption that the path is relative, not absolute -// This is bc we do not support absolute paths when we also have a port -fn ssh_path(input: &str) -> IResult<&str, Option<&str>> { - opt(preceded(one_of("/:"), rest)).parse(input) -} - -fn path(input: &str) -> IResult<&str, &str> { - rest(input) -} - -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] struct GitUrlBuilder { hint: GitUrlParseHint, scheme: Option, @@ -147,6 +103,50 @@ impl GitUrlBuilder { git_url } + + fn scheme(input: &str) -> IResult<&str, Option<&str>> { + opt(terminated( + alt(( + tag(Scheme::File.to_string().as_bytes()), + tag(Scheme::Ftps.to_string().as_bytes()), + tag(Scheme::Ftp.to_string().as_bytes()), + tag(Scheme::GitSsh.to_string().as_bytes()), + tag(Scheme::Git.to_string().as_bytes()), + tag(Scheme::Https.to_string().as_bytes()), + tag(Scheme::Http.to_string().as_bytes()), + tag(Scheme::Ssh.to_string().as_bytes()), + // todo: Other(), needs a test + )), + tag("://"), + )) + .parse(input) + } + + fn username(input: &str) -> IResult<&str, Option<&str>> { + opt(terminated(take_until("@"), tag("@"))).parse(input) + } + + fn token(input: &str) -> IResult<&str, Option<&str>> { + opt(terminated(take_until(":"), tag(":"))).parse(input) + } + + fn hostname(input: &str) -> IResult<&str, Option<&str>> { + opt(take_till(|c| c == '/' || c == ':')).parse(input) + } + + fn port(input: &str) -> IResult<&str, Option<&str>> { + opt(preceded(tag(":"), take_while(|c: char| c.is_digit(10)))).parse(input) + } + + // This is making an assumption that the path is relative, not absolute + // This is bc we do not support absolute paths when we also have a port + fn ssh_path(input: &str) -> IResult<&str, Option<&str>> { + opt(preceded(one_of("/:"), rest)).parse(input) + } + + fn path(input: &str) -> IResult<&str, &str> { + rest(input) + } } /// GitUrl represents an input url that is a url used by git @@ -300,112 +300,32 @@ impl GitUrl { new_giturl } - fn build_init(url: &str) -> Result { - #[cfg(feature = "tracing")] - debug!("Processing: {:?}", &url); - - // TODO: Should this be extended to check for any whitespace? - // Error if there are null bytes within the url - // https://github.com/tjtelan/git-url-parse-rs/issues/16 - if url.contains('\0') { - return Err(GitUrlParseError::FoundNullBytes); - } - - Ok(GitUrlBuilder::default()) - } - /// Returns a `Result` after normalizing and parsing `url` for metadata pub fn parse(url: &str) -> Result { println!("start: {url}"); let mut giturl = GitUrlBuilder::init(url)?; - let mut working_url = url; - if let Ok((leftover, Some(scheme))) = scheme(working_url) { - println!("leftover: {leftover}, scheme: {scheme:?}"); - - let s = Scheme::from_str(scheme).expect("Unknown scheme"); - - giturl.scheme = Some(s.clone()); - giturl.print_scheme = true; - working_url = leftover; - - giturl.hint = match s { - Scheme::Ssh => GitUrlParseHint::Sshlike, - Scheme::File => GitUrlParseHint::Filelike, - _ => GitUrlParseHint::Httplike, - } - } - - if let Ok((leftover, Some(username))) = username(working_url) { - println!("leftover: {leftover}, username: {username:?}"); - giturl.user = Some(username.to_string()); - - working_url = leftover; - - if giturl.hint == GitUrlParseHint::Unknown { - giturl.hint = GitUrlParseHint::Sshlike; - } - - if let Ok((token, Some(real_username))) = token(username) { - println!("token: {token}, real_username: {real_username:?}"); - giturl.user = Some(real_username.to_string()); - giturl.token = Some(token.to_string()); - - if giturl.hint == GitUrlParseHint::Unknown - || giturl.hint == GitUrlParseHint::Sshlike - { - giturl.hint = GitUrlParseHint::Httplike; - } - } - } + (giturl, working_url) = parse_scheme(giturl, &working_url); + (giturl, working_url) = parse_auth_info(giturl, working_url); let save_state = working_url; - if let Ok((leftover, Some(hostname))) = hostname(working_url) { - println!("leftover {leftover}, hostname: {hostname}"); - giturl.host = Some(hostname.to_string()); - working_url = leftover; + (giturl, working_url) = parse_host_port(giturl, working_url); - if let Ok((leftover, Some(port))) = port(leftover) { - if !port.is_empty() { - println!("leftover {leftover}, port: {port}"); - giturl.port = Some(u16::from_str(port).expect("Not a valid port")); - working_url = leftover; - - if giturl.hint == GitUrlParseHint::Unknown { - giturl.hint = GitUrlParseHint::Httplike; - } - } + match giturl.hint { + GitUrlParseHint::Httplike => {} + GitUrlParseHint::Sshlike => { + (giturl, working_url) = parse_ssh_path(giturl, working_url); } - } - - if giturl.hint == GitUrlParseHint::Sshlike { - // https://mslinn.com/git/040-git-urls.html - we only support relative paths when we have ports - //if let Some(ssh_path) = working_url.strip_prefix(":") { - //} - - if let Ok((_leftover, Some(path))) = ssh_path(working_url) { - working_url = path; - // This is important for printing the url correctly with the ":" - giturl.scheme = Some(Scheme::Ssh) + GitUrlParseHint::Filelike | GitUrlParseHint::Unknown => { + working_url = save_state; + giturl.host = None; + giturl.scheme = Some(Scheme::File); } } - if giturl.hint == GitUrlParseHint::Unknown { - working_url = save_state; - giturl.host = None; - giturl.scheme = Some(Scheme::File); - } - - if let Ok((leftover, path)) = path(working_url) { - println!("leftover {leftover}, path: {path}"); - if path.is_empty() { - return Err(GitUrlParseError::EmptyPath); - } - - giturl.path = path.to_string(); - } + (giturl, _) = parse_path(giturl, working_url)?; println!(""); Ok(giturl.build()) @@ -578,6 +498,125 @@ impl GitUrl { } } +fn parse_scheme<'a>(giturl:GitUrlBuilder, working_url: &'a str) -> (GitUrlBuilder, &'a str) { + let mut builder = giturl.clone(); + + if let Ok((leftover, Some(s))) = GitUrlBuilder::scheme(working_url) { + println!("leftover: {leftover}, scheme: {s:?}"); + + let scheme = Scheme::from_str(s).expect("Unknown scheme"); + + builder.hint = match &scheme { + Scheme::Ssh => GitUrlParseHint::Sshlike, + Scheme::File => GitUrlParseHint::Filelike, + _ => GitUrlParseHint::Httplike, + }; + + builder.scheme = Some(scheme); + builder.print_scheme = true; + //working_url = leftover; + + (builder, leftover) + } else { + (builder, working_url) + } +} + +fn parse_auth_info<'a>( + giturl: GitUrlBuilder, + working_url: &'a str, +) -> (GitUrlBuilder, &'a str) { + + let mut builder = giturl.clone(); + if let Ok((leftover, Some(username))) = GitUrlBuilder::username(working_url) { + println!("leftover: {leftover}, username: {username:?}"); + builder.user = Some(username.to_string()); + + //working_url = leftover; + + if builder.hint == GitUrlParseHint::Unknown { + builder.hint = GitUrlParseHint::Sshlike; + } + + if let Ok((token, Some(real_username))) = GitUrlBuilder::token(username) { + println!("token: {token}, real_username: {real_username:?}"); + builder.user = Some(real_username.to_string()); + builder.token = Some(token.to_string()); + + if builder.hint == GitUrlParseHint::Unknown || builder.hint == GitUrlParseHint::Sshlike { + builder.hint = GitUrlParseHint::Httplike; + } + } + + (builder, leftover) + } else { + (builder, working_url) + } +} +fn parse_host_port<'a>( + giturl: GitUrlBuilder, + working_url: &'a str, +) -> (GitUrlBuilder, &'a str) { + let mut builder = giturl.clone(); + let mut save = working_url; + + if let Ok((leftover, Some(hostname))) = GitUrlBuilder::hostname(working_url) { + println!("leftover {leftover}, hostname: {hostname}"); + builder.host = Some(hostname.to_string()); + //working_url = leftover; + save = leftover; + } + + if let Ok((leftover, Some(port))) = GitUrlBuilder::port(save) { + if !port.is_empty() { + println!("leftover {leftover}, port: {port}"); + builder.port = Some(u16::from_str(port).expect("Not a valid port")); + //working_url = leftover; + save = leftover; + + if builder.hint == GitUrlParseHint::Unknown { + builder.hint = GitUrlParseHint::Httplike; + } + } + } + + (builder, save) +} + +fn parse_ssh_path<'a>(giturl: GitUrlBuilder, working_url: &'a str) -> (GitUrlBuilder, &'a str) { + + let mut builder = giturl.clone(); + // https://mslinn.com/git/040-git-urls.html - we only support relative paths when we have ports + //if let Some(ssh_path) = working_url.strip_prefix(":") { + //} + + if let Ok((_leftover, Some(path))) = GitUrlBuilder::ssh_path(working_url) { + //working_url = path; + // This is important for printing the url correctly with the ":" + builder.scheme = Some(Scheme::Ssh); + (builder, path) + } else { + (builder, working_url) + } +} + +fn parse_path<'a>( + giturl: GitUrlBuilder, + working_url: &'a str, +) -> Result<(GitUrlBuilder, &'a str), GitUrlParseError> { + let mut builder = giturl.clone(); + if let Ok((leftover, path)) = GitUrlBuilder::path(working_url) { + println!("leftover {leftover}, path: {path}"); + if path.is_empty() { + return Err(GitUrlParseError::EmptyPath); + } + + builder.path = path.to_string(); + Ok((builder.clone(), leftover)) + } else { + Ok((builder.clone(), working_url)) + } +} /// `normalize_ssh_url` takes in an ssh url that separates the login info /// from the path into with a `:` and replaces it with `/`. /// From 3fc6f3496a56551c0695692e0f043a414119e0df Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Sat, 16 Aug 2025 12:06:55 -0700 Subject: [PATCH 05/32] Moved into derive_builder --- Cargo.toml | 7 +- examples/multi.rs | 2 +- examples/nom.rs | 2 +- examples/trim_auth.rs | 2 +- src/lib.rs | 899 ++++++++++++++++++++++++++---------------- tests/mod.rs | 2 +- tests/normalize.rs | 369 ++++++++--------- tests/parse.rs | 814 +++++++++++++++++++------------------- 8 files changed, 1162 insertions(+), 935 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6bd02d1..064e24e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,11 +18,12 @@ tracing = ["dep:tracing"] [dependencies] tracing = { version = "0.1", optional = true } -url = { version = "2.2" } +#url = { version = "2.2" } strum = { version = "0.27", features = ["derive"] } -thiserror = "2.0" +thiserror = "2" -nom = "8.0.0" +nom = "8" +derive_builder = "0.20" [dev-dependencies] env_logger = "0.11" diff --git a/examples/multi.rs b/examples/multi.rs index 529913f..7fde7bf 100644 --- a/examples/multi.rs +++ b/examples/multi.rs @@ -24,7 +24,7 @@ fn main() -> Result<(), GitUrlParseError> { ]; for test_url in test_vec { - let parsed = GitUrl::parse(test_url)?; + let parsed = GitUrl::parse(test_url).unwrap(); println!("Original: {}", test_url); println!("Parsed: {}", parsed); println!("{:?}\n", parsed); diff --git a/examples/nom.rs b/examples/nom.rs index e47a1a3..f4e505a 100644 --- a/examples/nom.rs +++ b/examples/nom.rs @@ -28,7 +28,7 @@ fn main() -> Result<(), GitUrlParseError> { ]; for test_url in test_vec { - let parsed = GitUrl::parse(test_url)?; + let parsed = GitUrl::parse(test_url).unwrap(); //println!("leftover:{leftover:#?}, output:{output:#?}"); //let parsed = GitUrl::parse(test_url)?; //println!("Original: {}", test_url); diff --git a/examples/trim_auth.rs b/examples/trim_auth.rs index f51f8c9..a685035 100644 --- a/examples/trim_auth.rs +++ b/examples/trim_auth.rs @@ -25,7 +25,7 @@ fn main() -> Result<(), GitUrlParseError> { println!("Original: {}", test_url); println!( "Parsed + Trimmed: {}\n", - GitUrl::parse(test_url)?.trim_auth() + GitUrl::parse(test_url).unwrap().trim_auth() ); } Ok(()) diff --git a/src/lib.rs b/src/lib.rs index eb640b5..d9857d9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,10 @@ +use core::str; +use derive_builder::Builder; use std::fmt; use std::str::FromStr; use strum::{Display, EnumString, VariantNames}; use thiserror::Error; -use url::Url; +//use url::Url; use nom::branch::alt; use nom::bytes::complete::{tag, take_till, take_until, take_while}; @@ -51,7 +53,7 @@ enum GitUrlParseHint { } #[derive(Debug, Default, Clone)] -struct GitUrlBuilder { +struct GitUrlBuilderOld { hint: GitUrlParseHint, scheme: Option, user: Option, @@ -64,47 +66,239 @@ struct GitUrlBuilder { } impl GitUrlBuilder { - fn init(url: &str) -> Result { + fn prebuild_check(&self) -> Result<(), String> { #[cfg(feature = "tracing")] debug!("Processing: {:?}", &url); // Error if there are null bytes within the url // https://github.com/tjtelan/git-url-parse-rs/issues/16 - if url.contains('\0') { - return Err(GitUrlParseError::FoundNullBytes); + //if let Some(Some(host)) = &self.host { + // if host.contains('\0') { + // return Err(GitUrlParseError::FoundNullBytes.to_string()); + // } + + // if host.is_empty() { + // return Err( + // GitUrlParseError::UnexpectedEmptyValue(String::from("host")).to_string() + // ); + // } + //} + + //if let Some(Some(user)) = &self.user { + // if user.contains('\0') { + // return Err(GitUrlParseError::FoundNullBytes.to_string()); + // } + + // if user.is_empty() { + // return Err( + // GitUrlParseError::UnexpectedEmptyValue(String::from("user")).to_string() + // ); + // } + //} + + //if let Some(Some(token)) = &self.token { + // if token.contains('\0') { + // return Err(GitUrlParseError::FoundNullBytes.to_string()); + // } + + // if token.is_empty() { + // return Err( + // GitUrlParseError::UnexpectedEmptyValue(String::from("token")).to_string(), + // ); + // } + //} + + //if let Some(path) = &self.path { + // if path.contains('\0') { + // return Err(GitUrlParseError::FoundNullBytes.to_string()); + // } + // if path.is_empty() { + // return Err( + // GitUrlParseError::UnexpectedEmptyValue(String::from("path")).to_string() + // ); + // } + //} + + Ok(()) + } + + fn parse(url: &str) -> Result { + println!("start: {url}"); + let mut giturl = GitUrlBuilder::default(); + let mut working_url = url; + let mut hint = GitUrlParseHint::default(); + + //working_url = giturl.parse_scheme(&working_url); + giturl.parse_scheme(&mut working_url, &mut hint); + //working_url = giturl.parse_auth_info(&working_url); + giturl.parse_auth_info(&mut working_url, &mut hint); + let save_state = working_url.clone(); + + //working_url = giturl.parse_host_port(&working_url); + giturl.parse_host_port(&mut working_url, &mut hint); + + match hint { + GitUrlParseHint::Httplike => {} + GitUrlParseHint::Sshlike => { + //working_url = giturl.parse_ssh_path(&working_url); + giturl.parse_ssh_path(&mut working_url, &mut hint); + } + GitUrlParseHint::Filelike | GitUrlParseHint::Unknown => { + working_url = save_state; + giturl.host(None); + //giturl.host = None; + giturl.scheme(Scheme::File); + //giturl.scheme = Some(Scheme::File); + } } - Ok(GitUrlBuilder::default()) + + //(giturl, _) = parse_path(giturl, working_url)?; + giturl.parse_path(&mut working_url, &mut hint); + + println!(""); + Ok(giturl) } - //fn parse_scheme(&mut self, input: &str) -> Result + fn parse_scheme(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { + let mut builder = self.clone(); + + if let Ok((leftover, Some(s))) = GitUrlBuilder::_parse_scheme(working_url) { + println!("leftover: {leftover}, scheme: {s:?}"); + + let scheme = Scheme::from_str(s).expect("Unknown scheme"); + + // todo: pass in hint! + *hint = match &scheme { + Scheme::Ssh => GitUrlParseHint::Sshlike, + Scheme::File => GitUrlParseHint::Filelike, + _ => GitUrlParseHint::Httplike, + }; - // should I validate that there are values, not empty strings? - fn build(&self) -> GitUrl { - let mut git_url = GitUrl::default(); + builder.scheme(Some(scheme)); + //builder.scheme = Some(scheme); + builder.print_scheme(true); + //builder.print_scheme = true; + //working_url = leftover; - if let Some(scheme) = self.scheme.clone() { - git_url.scheme = Some(scheme); - git_url.print_scheme = self.print_scheme; + *self = builder; + *working_url = leftover; } - if let Some(user) = self.user.clone() { - git_url.user = Some(user.clone()); + //else { + // working_url + //} + } + + fn parse_auth_info(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { + let mut builder = self.clone(); + if let Ok((leftover, Some(username))) = GitUrlBuilder::_parse_username(working_url) { + println!("leftover: {leftover}, username: {username:?}"); + //builder.user = Some(username.to_string()); + builder.user(Some(username.to_string())); + + //working_url = leftover; + + if *hint == GitUrlParseHint::Unknown { + *hint = GitUrlParseHint::Sshlike; + } + + if let Ok((token, Some(real_username))) = GitUrlBuilder::_parse_token(username) { + println!("token: {token}, real_username: {real_username:?}"); + //builder.user = Some(real_username.to_string()); + builder.user(Some(real_username.to_string())); + //builder.token = Some(token.to_string()); + builder.token(Some(token.to_string())); + + if *hint == GitUrlParseHint::Unknown || *hint == GitUrlParseHint::Sshlike { + *hint = GitUrlParseHint::Httplike; + } + } + + *working_url = leftover; + *self = builder; + //(builder, leftover) } - if let Some(token) = self.token.clone() { - git_url.token = Some(token.clone()); + //else { + // (builder, working_url) + //} + } + + fn parse_host_port(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { + let mut builder = self.clone(); + let mut save = working_url.clone(); + + if let Ok((leftover, Some(hostname))) = GitUrlBuilder::_parse_hostname(save) { + println!("leftover {leftover}, hostname: {hostname}"); + //builder.host = Some(hostname.to_string()); + builder.host(Some(hostname.to_string())); + //working_url = leftover; + save = leftover; } - if let Some(host) = self.host.clone() { - git_url.host = Some(host.clone()); + + if let Ok((leftover, Some(port))) = GitUrlBuilder::_parse_port(save) { + if !port.is_empty() { + println!("leftover {leftover}, port: {port}"); + //builder.port = Some(u16::from_str(port).expect("Not a valid port")); + builder.port(Some(u16::from_str(port).expect("Not a valid port"))); + //working_url = leftover; + save = leftover; + + if *hint == GitUrlParseHint::Unknown { + *hint = GitUrlParseHint::Httplike; + } + } } - if let Some(port) = self.port { - git_url.port = Some(port); + + *self = builder; + *working_url = save; + + //(builder, save) + } + + fn parse_ssh_path(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { + let mut builder = self.clone(); + // https://mslinn.com/git/040-git-urls.html - we only support relative paths when we have ports + //if let Some(ssh_path) = working_url.strip_prefix(":") { + //} + + if let Ok((_leftover, Some(path))) = GitUrlBuilder::_parse_ssh_path(working_url) { + //working_url = path; + // This is important for printing the url correctly with the ":" + //builder.scheme = Some(Scheme::Ssh); + builder.scheme(Some(Scheme::Ssh)); + + *self = builder; + *working_url = path; + //(builder, path) } - git_url.path = self.path.clone(); + //else { + // (builder, working_url) + //} + } + + fn parse_path(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { + let mut builder = self.clone(); + if let Ok((leftover, path)) = GitUrlBuilder::_parse_path(working_url) { + println!("leftover {leftover}, path: {path}"); + //if path.is_empty() { + // return Err(GitUrlParseError::UnexpectedEmptyValue(String::from("path"))); + //} - git_url + //builder.path = path.to_string(); + builder.path(path.to_string()); + + *self = builder; + *working_url = leftover; + //Ok((builder.clone(), leftover)) + } + //else { + // Ok((builder.clone(), working_url)) + //} } - fn scheme(input: &str) -> IResult<&str, Option<&str>> { + //// + + fn _parse_scheme(input: &str) -> IResult<&str, Option<&str>> { opt(terminated( alt(( tag(Scheme::File.to_string().as_bytes()), @@ -122,29 +316,29 @@ impl GitUrlBuilder { .parse(input) } - fn username(input: &str) -> IResult<&str, Option<&str>> { + fn _parse_username(input: &str) -> IResult<&str, Option<&str>> { opt(terminated(take_until("@"), tag("@"))).parse(input) } - fn token(input: &str) -> IResult<&str, Option<&str>> { + fn _parse_token(input: &str) -> IResult<&str, Option<&str>> { opt(terminated(take_until(":"), tag(":"))).parse(input) } - fn hostname(input: &str) -> IResult<&str, Option<&str>> { + fn _parse_hostname(input: &str) -> IResult<&str, Option<&str>> { opt(take_till(|c| c == '/' || c == ':')).parse(input) } - fn port(input: &str) -> IResult<&str, Option<&str>> { + fn _parse_port(input: &str) -> IResult<&str, Option<&str>> { opt(preceded(tag(":"), take_while(|c: char| c.is_digit(10)))).parse(input) } // This is making an assumption that the path is relative, not absolute // This is bc we do not support absolute paths when we also have a port - fn ssh_path(input: &str) -> IResult<&str, Option<&str>> { + fn _parse_ssh_path(input: &str) -> IResult<&str, Option<&str>> { opt(preceded(one_of("/:"), rest)).parse(input) } - fn path(input: &str) -> IResult<&str, &str> { + fn _parse_path(input: &str) -> IResult<&str, &str> { rest(input) } } @@ -153,10 +347,12 @@ impl GitUrlBuilder { /// Internally during parsing the url is sanitized and uses the `url` crate to perform /// the majority of the parsing effort, and with some extra handling to expose /// metadata used my many git hosting services -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Eq, Clone, Builder)] +#[builder(build_fn(validate = "Self::prebuild_check"))] pub struct GitUrl { /// The fully qualified domain name (FQDN) or IP of the repo - pub host: Option, + #[builder(setter(into), default)] + host: Option, ///// The name of the repo //pub name: String, ///// The owner/account/project name @@ -166,20 +362,26 @@ pub struct GitUrl { ///// The full name of the repo, formatted as "owner/name" //pub fullname: String, ///// The git url scheme - pub scheme: Option, + #[builder(setter(into), default)] + scheme: Option, /// The authentication user - pub user: Option, + #[builder(setter(into), default)] + user: Option, /// The oauth token (could appear in the https urls) - pub token: Option, + #[builder(setter(into), default)] + token: Option, /// The non-conventional port where git service is hosted - pub port: Option, + #[builder(setter(into), default)] + port: Option, /// The path to repo w/ respect to user + hostname - pub path: String, + #[builder(setter(into))] + path: String, ///// Indicate if url uses the .git suffix //pub git_suffix: bool, ///// Indicate if url explicitly uses its scheme //pub scheme_prefix: bool, - pub print_scheme: bool, + #[builder(default)] + print_scheme: bool, } /// Build the printable GitUrl from its components @@ -231,16 +433,16 @@ impl fmt::Display for GitUrl { None => String::new(), }; - //let path = match &self.scheme { - // Scheme::Ssh => { - // if self.port.is_some() { - // format!("/{}", &self.path) - // } else { - // format!(":{}", &self.path) - // } - // } - // _ => self.path.to_string(), - //}; + ////let path = match &self.scheme { + //// Scheme::Ssh => { + //// if self.port.is_some() { + //// format!("/{}", &self.path) + //// } else { + //// format!(":{}", &self.path) + //// } + //// } + //// _ => self.path.to_string(), + ////}; let path = if self.scheme == Some(Scheme::Ssh) { if self.port.is_some() { @@ -283,7 +485,8 @@ impl Default for GitUrl { } impl FromStr for GitUrl { - type Err = GitUrlParseError; + //type Err = GitUrlParseError; + type Err = GitUrlBuilderError; fn from_str(s: &str) -> Result { GitUrl::parse(s) @@ -301,34 +504,34 @@ impl GitUrl { } /// Returns a `Result` after normalizing and parsing `url` for metadata - pub fn parse(url: &str) -> Result { + pub fn parse(url: &str) -> Result { println!("start: {url}"); - let mut giturl = GitUrlBuilder::init(url)?; - let mut working_url = url; + let mut giturl = GitUrlBuilder::parse(url).unwrap(); + //let mut working_url = url; - (giturl, working_url) = parse_scheme(giturl, &working_url); - (giturl, working_url) = parse_auth_info(giturl, working_url); + //(giturl, working_url) = parse_scheme(giturl, &working_url); + //(giturl, working_url) = parse_auth_info(giturl, working_url); - let save_state = working_url; + //let save_state = working_url; - (giturl, working_url) = parse_host_port(giturl, working_url); + //(giturl, working_url) = parse_host_port(giturl, working_url); - match giturl.hint { - GitUrlParseHint::Httplike => {} - GitUrlParseHint::Sshlike => { - (giturl, working_url) = parse_ssh_path(giturl, working_url); - } - GitUrlParseHint::Filelike | GitUrlParseHint::Unknown => { - working_url = save_state; - giturl.host = None; - giturl.scheme = Some(Scheme::File); - } - } + //match giturl.hint { + // GitUrlParseHint::Httplike => {} + // GitUrlParseHint::Sshlike => { + // (giturl, working_url) = parse_ssh_path(giturl, working_url); + // } + // GitUrlParseHint::Filelike | GitUrlParseHint::Unknown => { + // working_url = save_state; + // giturl.host = None; + // giturl.scheme = Some(Scheme::File); + // } + //} - (giturl, _) = parse_path(giturl, working_url)?; + //(giturl, _) = parse_path(giturl, working_url)?; - println!(""); - Ok(giturl.build()) + //println!(""); + giturl.build() //// Normalize the url so we can use Url crate to process ssh urls //let normalized = normalize_url(url)?; @@ -498,167 +701,174 @@ impl GitUrl { } } -fn parse_scheme<'a>(giturl:GitUrlBuilder, working_url: &'a str) -> (GitUrlBuilder, &'a str) { - let mut builder = giturl.clone(); - - if let Ok((leftover, Some(s))) = GitUrlBuilder::scheme(working_url) { - println!("leftover: {leftover}, scheme: {s:?}"); - - let scheme = Scheme::from_str(s).expect("Unknown scheme"); - - builder.hint = match &scheme { - Scheme::Ssh => GitUrlParseHint::Sshlike, - Scheme::File => GitUrlParseHint::Filelike, - _ => GitUrlParseHint::Httplike, - }; - - builder.scheme = Some(scheme); - builder.print_scheme = true; - //working_url = leftover; - - (builder, leftover) - } else { - (builder, working_url) - } -} - -fn parse_auth_info<'a>( - giturl: GitUrlBuilder, - working_url: &'a str, -) -> (GitUrlBuilder, &'a str) { - - let mut builder = giturl.clone(); - if let Ok((leftover, Some(username))) = GitUrlBuilder::username(working_url) { - println!("leftover: {leftover}, username: {username:?}"); - builder.user = Some(username.to_string()); - - //working_url = leftover; - - if builder.hint == GitUrlParseHint::Unknown { - builder.hint = GitUrlParseHint::Sshlike; - } - - if let Ok((token, Some(real_username))) = GitUrlBuilder::token(username) { - println!("token: {token}, real_username: {real_username:?}"); - builder.user = Some(real_username.to_string()); - builder.token = Some(token.to_string()); - - if builder.hint == GitUrlParseHint::Unknown || builder.hint == GitUrlParseHint::Sshlike { - builder.hint = GitUrlParseHint::Httplike; - } - } - - (builder, leftover) - } else { - (builder, working_url) - } -} -fn parse_host_port<'a>( - giturl: GitUrlBuilder, - working_url: &'a str, -) -> (GitUrlBuilder, &'a str) { - let mut builder = giturl.clone(); - let mut save = working_url; - - if let Ok((leftover, Some(hostname))) = GitUrlBuilder::hostname(working_url) { - println!("leftover {leftover}, hostname: {hostname}"); - builder.host = Some(hostname.to_string()); - //working_url = leftover; - save = leftover; - } - - if let Ok((leftover, Some(port))) = GitUrlBuilder::port(save) { - if !port.is_empty() { - println!("leftover {leftover}, port: {port}"); - builder.port = Some(u16::from_str(port).expect("Not a valid port")); - //working_url = leftover; - save = leftover; - - if builder.hint == GitUrlParseHint::Unknown { - builder.hint = GitUrlParseHint::Httplike; - } - } - } - - (builder, save) -} +// start of old + +//fn parse_scheme<'a>(giturl: GitUrlBuilderOld, working_url: &'a str) -> (GitUrlBuilderOld, &'a str) { +// let mut builder = giturl.clone(); +// +// if let Ok((leftover, Some(s))) = GitUrlBuilderOld::scheme(working_url) { +// println!("leftover: {leftover}, scheme: {s:?}"); +// +// let scheme = Scheme::from_str(s).expect("Unknown scheme"); +// +// builder.hint = match &scheme { +// Scheme::Ssh => GitUrlParseHint::Sshlike, +// Scheme::File => GitUrlParseHint::Filelike, +// _ => GitUrlParseHint::Httplike, +// }; +// +// builder.scheme = Some(scheme); +// builder.print_scheme = true; +// //working_url = leftover; +// +// (builder, leftover) +// } else { +// (builder, working_url) +// } +//} +// +//fn parse_auth_info<'a>( +// giturl: GitUrlBuilderOld, +// working_url: &'a str, +//) -> (GitUrlBuilderOld, &'a str) { +// let mut builder = giturl.clone(); +// if let Ok((leftover, Some(username))) = GitUrlBuilderOld::username(working_url) { +// println!("leftover: {leftover}, username: {username:?}"); +// builder.user = Some(username.to_string()); +// +// //working_url = leftover; +// +// if builder.hint == GitUrlParseHint::Unknown { +// builder.hint = GitUrlParseHint::Sshlike; +// } +// +// if let Ok((token, Some(real_username))) = GitUrlBuilderOld::token(username) { +// println!("token: {token}, real_username: {real_username:?}"); +// builder.user = Some(real_username.to_string()); +// builder.token = Some(token.to_string()); +// +// if builder.hint == GitUrlParseHint::Unknown || builder.hint == GitUrlParseHint::Sshlike +// { +// builder.hint = GitUrlParseHint::Httplike; +// } +// } +// +// (builder, leftover) +// } else { +// (builder, working_url) +// } +//} +//fn parse_host_port<'a>( +// giturl: GitUrlBuilderOld, +// working_url: &'a str, +//) -> (GitUrlBuilderOld, &'a str) { +// let mut builder = giturl.clone(); +// let mut save = working_url; +// +// if let Ok((leftover, Some(hostname))) = GitUrlBuilderOld::hostname(working_url) { +// println!("leftover {leftover}, hostname: {hostname}"); +// builder.host = Some(hostname.to_string()); +// //working_url = leftover; +// save = leftover; +// } +// +// if let Ok((leftover, Some(port))) = GitUrlBuilderOld::port(save) { +// if !port.is_empty() { +// println!("leftover {leftover}, port: {port}"); +// builder.port = Some(u16::from_str(port).expect("Not a valid port")); +// //working_url = leftover; +// save = leftover; +// +// if builder.hint == GitUrlParseHint::Unknown { +// builder.hint = GitUrlParseHint::Httplike; +// } +// } +// } +// +// (builder, save) +//} +// +//fn parse_ssh_path<'a>( +// giturl: GitUrlBuilderOld, +// working_url: &'a str, +//) -> (GitUrlBuilderOld, &'a str) { +// let mut builder = giturl.clone(); +// // https://mslinn.com/git/040-git-urls.html - we only support relative paths when we have ports +// //if let Some(ssh_path) = working_url.strip_prefix(":") { +// //} +// +// if let Ok((_leftover, Some(path))) = GitUrlBuilderOld::ssh_path(working_url) { +// //working_url = path; +// // This is important for printing the url correctly with the ":" +// builder.scheme = Some(Scheme::Ssh); +// (builder, path) +// } else { +// (builder, working_url) +// } +//} +// +//fn parse_path<'a>( +// giturl: GitUrlBuilderOld, +// working_url: &'a str, +//) -> Result<(GitUrlBuilderOld, &'a str), GitUrlParseError> { +// let mut builder = giturl.clone(); +// if let Ok((leftover, path)) = GitUrlBuilderOld::path(working_url) { +// println!("leftover {leftover}, path: {path}"); +// if path.is_empty() { +// return Err(GitUrlParseError::UnexpectedEmptyValue(String::from("path"))); +// } +// +// builder.path = path.to_string(); +// Ok((builder.clone(), leftover)) +// } else { +// Ok((builder.clone(), working_url)) +// } +//} + +// end of old -fn parse_ssh_path<'a>(giturl: GitUrlBuilder, working_url: &'a str) -> (GitUrlBuilder, &'a str) { - - let mut builder = giturl.clone(); - // https://mslinn.com/git/040-git-urls.html - we only support relative paths when we have ports - //if let Some(ssh_path) = working_url.strip_prefix(":") { - //} - - if let Ok((_leftover, Some(path))) = GitUrlBuilder::ssh_path(working_url) { - //working_url = path; - // This is important for printing the url correctly with the ":" - builder.scheme = Some(Scheme::Ssh); - (builder, path) - } else { - (builder, working_url) - } -} - -fn parse_path<'a>( - giturl: GitUrlBuilder, - working_url: &'a str, -) -> Result<(GitUrlBuilder, &'a str), GitUrlParseError> { - let mut builder = giturl.clone(); - if let Ok((leftover, path)) = GitUrlBuilder::path(working_url) { - println!("leftover {leftover}, path: {path}"); - if path.is_empty() { - return Err(GitUrlParseError::EmptyPath); - } - - builder.path = path.to_string(); - Ok((builder.clone(), leftover)) - } else { - Ok((builder.clone(), working_url)) - } -} /// `normalize_ssh_url` takes in an ssh url that separates the login info /// from the path into with a `:` and replaces it with `/`. /// /// Prepends `ssh://` to url /// /// Supports absolute and relative paths -fn normalize_ssh_url(url: &str) -> Result { - let u = url.split(':').collect::>(); - - match u.len() { - 2 => { - #[cfg(feature = "tracing")] - debug!("Normalizing ssh url: {:?}", u); - normalize_url(&format!("ssh://{}/{}", u[0], u[1])) - } - 3 => { - #[cfg(feature = "tracing")] - debug!("Normalizing ssh url with ports: {:?}", u); - normalize_url(&format!("ssh://{}:{}/{}", u[0], u[1], u[2])) - } - _default => Err(GitUrlParseError::UnsupportedSshUrlFormat), - } -} +//fn normalize_ssh_url(url: &str) -> Result { +// let u = url.split(':').collect::>(); +// +// match u.len() { +// 2 => { +// #[cfg(feature = "tracing")] +// debug!("Normalizing ssh url: {:?}", u); +// normalize_url(&format!("ssh://{}/{}", u[0], u[1])) +// } +// 3 => { +// #[cfg(feature = "tracing")] +// debug!("Normalizing ssh url with ports: {:?}", u); +// normalize_url(&format!("ssh://{}:{}/{}", u[0], u[1], u[2])) +// } +// _default => Err(GitUrlParseError::UnsupportedSshUrlFormat), +// } +//} /// `normalize_file_path` takes in a filepath and uses `Url::from_file_path()` to parse /// /// Prepends `file://` to url -#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] -fn normalize_file_path(filepath: &str) -> Result { - let fp = Url::from_file_path(filepath); - - match fp { - Ok(path) => Ok(path), - Err(_e) => { - if let Ok(file_url) = normalize_url(&format!("file://{}", filepath)) { - Ok(file_url) - } else { - Err(GitUrlParseError::FileUrlNormalizeFailedSchemeAdded) - } - } - } -} +//#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] +//fn normalize_file_path(filepath: &str) -> Result { +// let fp = Url::from_file_path(filepath); +// +// match fp { +// Ok(path) => Ok(path), +// Err(_e) => { +// if let Ok(file_url) = normalize_url(&format!("file://{}", filepath)) { +// Ok(file_url) +// } else { +// Err(GitUrlParseError::FileUrlNormalizeFailedSchemeAdded) +// } +// } +// } +//} #[cfg(target_arch = "wasm32")] fn normalize_file_path(_filepath: &str) -> Result { @@ -668,146 +878,151 @@ fn normalize_file_path(_filepath: &str) -> Result { /// `normalize_url` takes in url as `&str` and takes an opinionated approach to identify /// `ssh://` or `file://` urls that require more information to be added so that /// they can be parsed more effectively by `url::Url::parse()` -pub fn normalize_url(url: &str) -> Result { - #[cfg(feature = "tracing")] - debug!("Processing: {:?}", &url); - - // TODO: Should this be extended to check for any whitespace? - // Error if there are null bytes within the url - // https://github.com/tjtelan/git-url-parse-rs/issues/16 - if url.contains('\0') { - return Err(GitUrlParseError::FoundNullBytes); - } - - // We're going to remove any trailing slash before running through Url::parse - let trim_url = url.trim_end_matches('/'); - - // TODO: Remove support for this form when I go to next major version. - // I forget what it supports, and it isn't obvious after searching for examples - // normalize short git url notation: git:host/path - let url_to_parse = if trim_url.starts_with("git:") && !trim_url.starts_with("git://") { - trim_url.replace("git:", "git://") - } else { - trim_url.to_string() - }; - - let url_parse = Url::parse(&url_to_parse); - - Ok(match url_parse { - Ok(u) => { - match Scheme::from_str(u.scheme()) { - Ok(_p) => u, - Err(_e) => { - // Catch case when an ssh url is given w/o a user - #[cfg(feature = "tracing")] - debug!("Scheme parse fail. Assuming a userless ssh url"); - if let Ok(ssh_url) = normalize_ssh_url(trim_url) { - ssh_url - } else { - return Err(GitUrlParseError::SshUrlNormalizeFailedNoScheme); - } - } - } - } - - // If we're here, we're only looking for Scheme::Ssh or Scheme::File - // TODO: Add test for this - Err(url::ParseError::RelativeUrlWithoutBase) => { - // Assuming we have found Scheme::Ssh if we can find an "@" before ":" - // Otherwise we have Scheme::File - //let re = Regex::new(r"^\S+(@)\S+(:).*$").with_context(|| { - // "Failed to build ssh git url regex for testing against url".to_string() - //})?; - - match is_ssh_url(trim_url) { - true => { - #[cfg(feature = "tracing")] - debug!("Scheme::SSH match for normalization"); - normalize_ssh_url(trim_url)? - } - false => { - #[cfg(feature = "tracing")] - debug!("Scheme::File match for normalization"); - normalize_file_path(trim_url)? - } - } - } - Err(err) => { - return Err(GitUrlParseError::from(err)); - } - }) -} +//pub fn normalize_url(url: &str) -> Result { +// #[cfg(feature = "tracing")] +// debug!("Processing: {:?}", &url); +// +// // TODO: Should this be extended to check for any whitespace? +// // Error if there are null bytes within the url +// // https://github.com/tjtelan/git-url-parse-rs/issues/16 +// if url.contains('\0') { +// return Err(GitUrlParseError::FoundNullBytes); +// } +// +// // We're going to remove any trailing slash before running through Url::parse +// let trim_url = url.trim_end_matches('/'); +// +// // TODO: Remove support for this form when I go to next major version. +// // I forget what it supports, and it isn't obvious after searching for examples +// // normalize short git url notation: git:host/path +// let url_to_parse = if trim_url.starts_with("git:") && !trim_url.starts_with("git://") { +// trim_url.replace("git:", "git://") +// } else { +// trim_url.to_string() +// }; +// +// let url_parse = Url::parse(&url_to_parse); +// +// Ok(match url_parse { +// Ok(u) => { +// match Scheme::from_str(u.scheme()) { +// Ok(_p) => u, +// Err(_e) => { +// // Catch case when an ssh url is given w/o a user +// #[cfg(feature = "tracing")] +// debug!("Scheme parse fail. Assuming a userless ssh url"); +// if let Ok(ssh_url) = normalize_ssh_url(trim_url) { +// ssh_url +// } else { +// return Err(GitUrlParseError::SshUrlNormalizeFailedNoScheme); +// } +// } +// } +// } +// +// // If we're here, we're only looking for Scheme::Ssh or Scheme::File +// // TODO: Add test for this +// Err(url::ParseError::RelativeUrlWithoutBase) => { +// // Assuming we have found Scheme::Ssh if we can find an "@" before ":" +// // Otherwise we have Scheme::File +// //let re = Regex::new(r"^\S+(@)\S+(:).*$").with_context(|| { +// // "Failed to build ssh git url regex for testing against url".to_string() +// //})?; +// +// match is_ssh_url(trim_url) { +// true => { +// #[cfg(feature = "tracing")] +// debug!("Scheme::SSH match for normalization"); +// normalize_ssh_url(trim_url)? +// } +// false => { +// #[cfg(feature = "tracing")] +// debug!("Scheme::File match for normalization"); +// normalize_file_path(trim_url)? +// } +// } +// } +// Err(err) => { +// return Err(GitUrlParseError::from(err)); +// } +// }) +//} // Valid ssh `url` for cloning have a usernames, // but we don't require it classification or parsing purposes // However a path must be specified with a `:` -fn is_ssh_url(url: &str) -> bool { - // if we do not have a path - if !url.contains(':') { - return false; - } - - // if we have a username, expect it before the path (Are usernames with colons valid?) - if let (Some(at_pos), Some(colon_pos)) = (url.find('@'), url.find(':')) { - if colon_pos < at_pos { - return false; - } - - // Make sure we provided a username, and not just `@` - let parts: Vec<&str> = url.split('@').collect(); - return parts.len() == 2 || parts[0].is_empty(); - } - - // it's an ssh url if we have a domain:path pattern - let parts: Vec<&str> = url.split(':').collect(); - - // FIXME: I am not sure how to validate a url with a port - //if parts.len() != 3 && !parts[0].is_empty() && !parts[1].is_empty() && !parts[2].is_empty() { - // return false; - //} - - // This should also handle if a port is specified - // no port example: ssh://user@domain:path/to/repo.git - // port example: ssh://user@domain:port/path/to/repo.git - parts.len() == 2 && parts[0].is_empty() && parts[1].is_empty() -} +//fn is_ssh_url(url: &str) -> bool { +// // if we do not have a path +// if !url.contains(':') { +// return false; +// } +// +// // if we have a username, expect it before the path (Are usernames with colons valid?) +// if let (Some(at_pos), Some(colon_pos)) = (url.find('@'), url.find(':')) { +// if colon_pos < at_pos { +// return false; +// } +// +// // Make sure we provided a username, and not just `@` +// let parts: Vec<&str> = url.split('@').collect(); +// return parts.len() == 2 || parts[0].is_empty(); +// } +// +// // it's an ssh url if we have a domain:path pattern +// let parts: Vec<&str> = url.split(':').collect(); +// +// // FIXME: I am not sure how to validate a url with a port +// //if parts.len() != 3 && !parts[0].is_empty() && !parts[1].is_empty() && !parts[2].is_empty() { +// // return false; +// //} +// +// // This should also handle if a port is specified +// // no port example: ssh://user@domain:path/to/repo.git +// // port example: ssh://user@domain:port/path/to/repo.git +// parts.len() == 2 && parts[0].is_empty() && parts[1].is_empty() +//} #[derive(Error, Debug, PartialEq, Eq)] pub enum GitUrlParseError { - #[error("Error from Url crate: {0}")] - UrlParseError(#[from] url::ParseError), + //#[error("Error from derive_builder")] + //DeriveBuilderError(#[from] derive_builder::UninitializedFieldError), + + //#[error("Error from Url crate: {0}")] + //UrlParseError(#[from] url::ParseError), - #[error("No url scheme was found, then failed to normalize as ssh url.")] - SshUrlNormalizeFailedNoScheme, + //#[error("No url scheme was found, then failed to normalize as ssh url.")] + //SshUrlNormalizeFailedNoScheme, - #[error("No url scheme was found, then failed to normalize as ssh url after adding 'ssh://'")] - SshUrlNormalizeFailedSchemeAdded, + //#[error("No url scheme was found, then failed to normalize as ssh url after adding 'ssh://'")] + //SshUrlNormalizeFailedSchemeAdded, - #[error("Failed to normalize as ssh url after adding 'ssh://'")] - SshUrlNormalizeFailedSchemeAddedWithPorts, + //#[error("Failed to normalize as ssh url after adding 'ssh://'")] + //SshUrlNormalizeFailedSchemeAddedWithPorts, - #[error("No url scheme was found, then failed to normalize as file url.")] - FileUrlNormalizeFailedNoScheme, + //#[error("No url scheme was found, then failed to normalize as file url.")] + //FileUrlNormalizeFailedNoScheme, - #[error("No url scheme was found, then failed to normalize as file url after adding 'file://'")] - FileUrlNormalizeFailedSchemeAdded, + //#[error("No url scheme was found, then failed to normalize as file url after adding 'file://'")] + //FileUrlNormalizeFailedSchemeAdded, - #[error("Git Url not in expected format")] - UnexpectedFormat, + //#[error("Git Url not in expected format")] + //UnexpectedFormat, // FIXME: Keep an eye on this error for removal #[error("Git Url for host using unexpected scheme")] UnexpectedScheme, - #[error("Scheme unsupported: {0}")] - UnsupportedScheme(String), - #[error("Host from Url cannot be str or does not exist")] - UnsupportedUrlHostFormat, - #[error("Git Url not in expected format for SSH")] - UnsupportedSshUrlFormat, - #[error("Normalized URL has no path")] - EmptyPath, - + //#[error("Scheme unsupported: {0}")] + //UnsupportedScheme(String), + //#[error("Host from Url cannot be str or does not exist")] + //UnsupportedUrlHostFormat, + //#[error("Git Url not in expected format for SSH")] + //UnsupportedSshUrlFormat, + //#[error("Normalized URL has no path")] + //EmptyPath, #[error("Found null bytes within input url before parsing")] FoundNullBytes, + + #[error("Value expected for field: {0}")] + UnexpectedEmptyValue(String), } diff --git a/tests/mod.rs b/tests/mod.rs index b693062..b265223 100644 --- a/tests/mod.rs +++ b/tests/mod.rs @@ -1,3 +1,3 @@ -mod normalize; +//mod normalize; mod parse; mod trim_auth; diff --git a/tests/normalize.rs b/tests/normalize.rs index ea1b174..230dafb 100644 --- a/tests/normalize.rs +++ b/tests/normalize.rs @@ -1,185 +1,186 @@ -use git_url_parse::*; - -// Url Normalization -#[test] -fn git() { - let test_url = "git://host.tld/user/project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - assert_eq!(normalized.as_str(), "git://host.tld/user/project-name.git"); -} - -// I'm not even sure if this is a form that should be supported bc I can't find examples of it being used in the wild by another service +//use git_url_parse::*; +// +//// Url Normalization +//#[test] +//fn git() { +// let test_url = "git://host.tld/user/project-name.git"; +// let normalized = normalize_url(test_url).expect("Normalizing url failed"); +// +// assert_eq!(normalized.as_str(), "git://host.tld/user/project-name.git"); +//} +// +//// I'm not even sure if this is a form that should be supported bc I can't find examples of it being used in the wild by another service +////#[should_panic] +//#[test] +//fn git2() { +// let test_url = "git:host.tld/user/project-name.git"; +// let normalized = normalize_url(test_url).expect("Normalizing url failed"); +// +// assert_eq!(normalized.as_str(), "git://host.tld/user/project-name.git"); +//} +// +//#[test] +//fn http() { +// let test_url = "http://host.tld/user/project-name.git"; +// let normalized = normalize_url(test_url).expect("Normalizing url failed"); +// +// assert_eq!(normalized.as_str(), "http://host.tld/user/project-name.git"); +//} +// +//#[test] +//fn https() { +// let test_url = "https://host.tld/user/project-name.git"; +// let normalized = normalize_url(test_url).expect("Normalizing url failed"); +// +// assert_eq!( +// normalized.as_str(), +// "https://host.tld/user/project-name.git" +// ); +//} +// +//#[test] +//fn ssh_scheme() { +// let test_url = "ssh://git@host.tld/user/project-name.git"; +// let normalized = normalize_url(test_url).expect("Normalizing url failed"); +// +// assert_eq!( +// normalized.as_str(), +// "ssh://git@host.tld/user/project-name.git" +// ); +//} +// +//#[test] +//fn ssh_no_scheme() { +// let test_url = "git@host.tld:user/project-name.git"; +// let normalized = normalize_url(test_url).expect("Normalizing url failed"); +// +// assert_eq!( +// normalized.as_str(), +// "ssh://git@host.tld/user/project-name.git" +// ); +//} +// +//#[test] +//fn ssh_no_scheme_no_user() { +// let test_url = "host.tld:user/project-name.git"; +// let normalized = normalize_url(test_url).expect("Normalizing url failed"); +// +// assert_eq!(normalized.as_str(), "ssh://host.tld/user/project-name.git"); +//} +// +//#[test] +//fn unix_file_scheme_abs_path() { +// let test_url = "file:///user/project-name.git"; +// let normalized = normalize_url(test_url).expect("Normalizing url failed"); +// +// assert_eq!(normalized.as_str(), "file:///user/project-name.git"); +//} +// +//#[test] +//fn unix_file_no_scheme_abs_path() { +// let test_url = "/user/project-name.git"; +// let normalized = normalize_url(test_url).expect("Normalizing url failed"); +// +// assert_eq!(normalized.as_str(), "file:///user/project-name.git"); +//} +// +//#[test] +//fn unix_file_scheme_rel_path() { +// let test_url = "file://../user/project-name.git"; +// let normalized = normalize_url(test_url).expect("Normalizing url failed"); +// +// assert_eq!(normalized.as_str(), "file://../user/project-name.git"); +//} +// +//#[test] +//fn unix_file_no_scheme_rel_path() { +// let test_url = "../user/project-name.git"; +// let normalized = normalize_url(test_url).expect("Normalizing url failed"); +// +// assert_eq!(normalized.as_str(), "file://../user/project-name.git"); +//} +// //#[should_panic] -#[test] -fn git2() { - let test_url = "git:host.tld/user/project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - assert_eq!(normalized.as_str(), "git://host.tld/user/project-name.git"); -} - -#[test] -fn http() { - let test_url = "http://host.tld/user/project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - assert_eq!(normalized.as_str(), "http://host.tld/user/project-name.git"); -} - -#[test] -fn https() { - let test_url = "https://host.tld/user/project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - assert_eq!( - normalized.as_str(), - "https://host.tld/user/project-name.git" - ); -} - -#[test] -fn ssh_scheme() { - let test_url = "ssh://git@host.tld/user/project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - assert_eq!( - normalized.as_str(), - "ssh://git@host.tld/user/project-name.git" - ); -} - -#[test] -fn ssh_no_scheme() { - let test_url = "git@host.tld:user/project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - assert_eq!( - normalized.as_str(), - "ssh://git@host.tld/user/project-name.git" - ); -} - -#[test] -fn ssh_no_scheme_no_user() { - let test_url = "host.tld:user/project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - assert_eq!(normalized.as_str(), "ssh://host.tld/user/project-name.git"); -} - -#[test] -fn unix_file_scheme_abs_path() { - let test_url = "file:///user/project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - assert_eq!(normalized.as_str(), "file:///user/project-name.git"); -} - -#[test] -fn unix_file_no_scheme_abs_path() { - let test_url = "/user/project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - assert_eq!(normalized.as_str(), "file:///user/project-name.git"); -} - -#[test] -fn unix_file_scheme_rel_path() { - let test_url = "file://../user/project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - assert_eq!(normalized.as_str(), "file://../user/project-name.git"); -} - -#[test] -fn unix_file_no_scheme_rel_path() { - let test_url = "../user/project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - assert_eq!(normalized.as_str(), "file://../user/project-name.git"); -} - -#[should_panic] -#[test] -fn win_file_scheme_abs_path() { - let test_url = "file://c:\\user\\project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - // I actually don't know how this should be normalized. - assert_eq!(normalized.as_str(), "file://c:\\user\\project-name.git"); -} - -#[should_panic] -#[test] -fn win_file_no_scheme_abs_path() { - let test_url = "c:\\user\\project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - // I actually don't know how this should be normalized. - assert_eq!(normalized.as_str(), "file://c:\\user\\project-name.git"); -} - -#[test] -fn win_file_scheme_rel_path() { - let test_url = "file://..\\user\\project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - // I actually don't know how this should be normalized. - assert_eq!(normalized.as_str(), "file://../user/project-name.git"); -} - -#[test] -fn win_file_no_scheme_rel_path() { - let test_url = "..\\user\\project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - // I actually don't know how this should be normalized. - assert_eq!(normalized.as_str(), "file://../user/project-name.git"); -} -#[test] -fn multi_git_ssh() { - let test_url = "git+ssh://host.tld/user/project-name.git"; - let normalized = normalize_url(test_url).expect("Normalizing url failed"); - - assert_eq!( - normalized.as_str(), - "git+ssh://host.tld/user/project-name.git" - ); -} - -// From https://github.com/tjtelan/git-url-parse-rs/issues/16 -#[test] -fn null_in_input1() { - let test_url = "////////ws///////////*,\u{0}\u{0}^\u{0}\u{0}\u{0}\u{0}@2\u{1}\u{0}\u{1d})\u{0}\u{0}\u{0}:\u{0}\u{0}\u{0}"; - let normalized = normalize_url(test_url); - - assert!(normalized.is_err()); -} - -// From https://github.com/tjtelan/git-url-parse-rs/issues/16 -#[test] -fn null_in_input2() { - let test_url = "?\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{1f}s\u{3}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{5}\u{1}@\u{0}\u{0}\u{4}!e\u{0}\u{0}2\u{1c}^3106://? "git://" //#[test] -//fn ssh_without_organization() { -// let test_url = "ssh://f589726c3611:29418/repo"; +//fn git() { +// let test_url = "git://github.com/owner/name.git"; // let parsed = GitUrl::parse(test_url).expect("URL parse failed"); // let expected = GitUrl { -// host: Some("f589726c3611".to_string()), -// //name: "repo".to_string(), -// //owner: Some("repo".to_string()), +// host: Some("github.com".to_string()), +// //name: "name".to_string(), +// //owner: Some("owner".to_string()), // //organization: None, -// //fullname: "repo/repo".to_string(), -// scheme: Some(Scheme::Ssh), +// //fullname: "owner/name".to_string(), +// scheme: Some(Scheme::Git), // user: None, // token: None, -// port: Some(29418), -// path: "repo".to_string(), -// //git_suffix: false, +// port: None, +// path: "/owner/name.git".to_string(), +// //git_suffix: true, // //scheme_prefix: true, // print_scheme: true, // }; // // assert_eq!(parsed, expected); //} - -#[test] -fn empty_path() { - assert_eq!( - GitUrlParseError::EmptyPath, - GitUrl::parse("file://").unwrap_err() - ) -} - -#[test] -fn bad_port_number() { - let test_url = "https://github.com:crypto-browserify/browserify-rsa.git"; - let e = GitUrl::parse(test_url); - - assert!(e.is_err()); - assert_eq!( - format!("{}", e.err().unwrap()), - "Error from Url crate: invalid port number" - ); -} - -// This test might not have a use anymore if we're not expanding "git:" -> "git://" -#[test] -fn git() { - let test_url = "git://github.com/owner/name.git"; - let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrl { - host: Some("github.com".to_string()), - //name: "name".to_string(), - //owner: Some("owner".to_string()), - //organization: None, - //fullname: "owner/name".to_string(), - scheme: Some(Scheme::Git), - user: None, - token: None, - port: None, - path: "/owner/name.git".to_string(), - //git_suffix: true, - //scheme_prefix: true, - print_scheme: true, - }; - - assert_eq!(parsed, expected); -} +// From 87ca5ff26a393c4d65ca4e51cbb5b70f7ac4a47a Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Sat, 16 Aug 2025 16:29:21 -0700 Subject: [PATCH 06/32] Fixed most of the tests --- tests/parse.rs | 679 ++++++++++++++++++++----------------------------- 1 file changed, 279 insertions(+), 400 deletions(-) diff --git a/tests/parse.rs b/tests/parse.rs index e1cb268..8874d8d 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -3,24 +3,9 @@ use git_url_parse::*; fn ssh_user_ports() { let test_url = "ssh://git@host.tld:9999/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - //let expected = GitUrl { - // host: Some("host.tld".to_string()), - // //name: "project-name".to_string(), - // //owner: Some("user".to_string()), - // //organization: None, - // //fullname: "user/project-name".to_string(), - // scheme: Some(Scheme::Ssh), - // user: Some("git".to_string()), - // token: None, - // port: Some(9999), - // path: "user/project-name.git".to_string(), - // //git_suffix: true, - // //scheme_prefix: true, - // print_scheme: true, - //}; let expected = GitUrlBuilder::default() - .host(Some(String::from("host.tld"))) .scheme(Some(Scheme::Ssh)) + .host(Some(String::from("host.tld"))) .user(Some(String::from("git"))) .port(Some(9999)) .path(String::from("user/project-name.git")) @@ -31,183 +16,266 @@ fn ssh_user_ports() { assert_eq!(parsed, expected); } -//// Specific service support -//#[test] -//fn https_user_bitbucket() { -// let test_url = "https://user@bitbucket.org/user/repo.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: Some("bitbucket.org".to_string()), -// //name: "repo".to_string(), -// //owner: Some("user".to_string()), -// //organization: None, -// //fullname: "user/repo".to_string(), -// scheme: Some(Scheme::Https), -// user: Some("user".to_string()), -// token: None, -// port: None, -// path: "/user/repo.git".to_string(), -// //git_suffix: true, -// //scheme_prefix: true, -// print_scheme: true, -// }; -// -// assert_eq!(parsed, expected); -//} -// -//#[test] -//fn ssh_user_bitbucket() { -// let test_url = "git@bitbucket.org:user/repo.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: Some("bitbucket.org".to_string()), -// //name: "repo".to_string(), -// //owner: Some("user".to_string()), -// //organization: None, -// //fullname: "user/repo".to_string(), -// scheme: Some(Scheme::Ssh), -// user: Some("git".to_string()), -// token: None, -// port: None, -// path: "user/repo.git".to_string(), -// //git_suffix: true, -// //scheme_prefix: false, -// print_scheme: false, -// }; -// -// assert_eq!(parsed, expected); -//} -// -//#[test] -//fn https_user_auth_bitbucket() { -// let test_url = "https://x-token-auth:token@bitbucket.org/owner/name.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: Some("bitbucket.org".to_string()), -// //name: "name".to_string(), -// //owner: Some("owner".to_string()), -// //organization: None, -// //fullname: "owner/name".to_string(), -// scheme: Some(Scheme::Https), -// user: Some("x-token-auth".to_string()), -// token: Some("token".to_string()), -// port: None, -// path: "/owner/name.git".to_string(), -// //git_suffix: true, -// //scheme_prefix: true, -// print_scheme: true, -// }; -// -// assert_eq!(parsed, expected); -//} -// +#[test] +fn ssh_no_scheme_no_user() { + let test_url = "host.tld:user/project-name.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::Ssh)) + .host(Some(String::from("host.tld"))) + .path(String::from("user/project-name.git")) + .print_scheme(false) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +// Specific service support +#[test] +fn https_user_bitbucket() { + let test_url = "https://user@bitbucket.org/user/repo.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::Https)) + .host(Some(String::from("bitbucket.org"))) + .user(Some(String::from("user"))) + .path(String::from("/user/repo.git")) + .print_scheme(true) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +#[test] +fn ssh_user_bitbucket() { + let test_url = "git@bitbucket.org:user/repo.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .host(Some(String::from("bitbucket.org"))) + .scheme(Some(Scheme::Ssh)) + .user(Some(String::from("git"))) + .path(String::from("user/repo.git")) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +#[test] +fn https_user_auth_bitbucket() { + let test_url = "https://x-token-auth:token@bitbucket.org/owner/name.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::Https)) + .host(Some("bitbucket.org".to_string())) + .user(String::from("x-token-auth")) + .token(String::from("token")) + .path(String::from("/owner/name.git")) + .print_scheme(true) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +#[test] +fn https_user_github() { + let test_url = "https://user@github.com/user/repo.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::Https)) + .user(Some(String::from("user"))) + .host(Some(String::from("github.com"))) + .path(String::from("/user/repo.git")) + .print_scheme(true) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +#[test] +fn ssh_user_github() { + let test_url = "git@github.com:user/repo.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::Ssh)) + .user(Some(String::from("git"))) + .host(Some(String::from("github.com"))) + .path(String::from("user/repo.git")) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +#[test] +fn https_user_auth_github() { + let test_url = "https://token:x-oauth-basic@github.com/owner/name.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::Https)) + .user(Some(String::from("token"))) + .token(Some(String::from("x-oauth-basic"))) + .host(Some(String::from("github.com"))) + .path(String::from("/owner/name.git")) + .print_scheme(true) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +#[test] +fn ssh_user_azure_devops() { + let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::Ssh)) + .user(Some(String::from("git"))) + .host(Some(String::from("ssh.dev.azure.com"))) + .path(String::from("v3/CompanyName/ProjectName/RepoName")) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +#[test] +fn https_user_azure_devops() { + let test_url = "https://organization@dev.azure.com/organization/project/_git/repo"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::Https)) + .user(Some(String::from("organization"))) + .host(Some(String::from("dev.azure.com"))) + .path(String::from("/organization/project/_git/repo")) + .print_scheme(true) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +#[test] +fn ftp_user() { + let test_url = "ftp://git@host.tld/user/project-name.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::Ftp)) + .user(Some(String::from("git"))) + .host(Some(String::from("host.tld"))) + .path(String::from("/user/project-name.git")) + .print_scheme(true) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +#[test] +fn ftps_user() { + let test_url = "ftps://git@host.tld/user/project-name.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::Ftps)) + .user(Some(String::from("git"))) + .host(Some(String::from("host.tld"))) + .path(String::from("/user/project-name.git")) + .print_scheme(true) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +#[test] +fn relative_unix_path() { + let test_url = "../project-name.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::File)) + .path(String::from("../project-name.git")) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +#[test] +fn absolute_unix_path() { + let test_url = "/path/to/project-name.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::File)) + .path(String::from("/path/to/project-name.git")) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +// Issue #6 - Relative Windows paths will parse into Unix paths +#[test] +fn relative_windows_path() { + let test_url = "..\\project-name.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::File)) + .path(String::from("../project-name.git")) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +// Can I use `typed-path` to deal with this? +// Issue #7 - Absolute Windows paths will not parse at all +#[should_panic(expected = "URL parse failed: UnexpectedFormat")] +#[test] +fn absolute_windows_path() { + let test_url = "c:\\project-name.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::File)) + .path(String::from("c:\\project-name.git")) + .build() + .unwrap(); + + assert_eq!(parsed, expected); +} + +// Move test //#[test] -//fn https_user_github() { -// let test_url = "https://user@github.com/user/repo.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: Some("github.com".to_string()), -// //name: "repo".to_string(), -// //owner: Some("user".to_string()), -// //organization: None, -// //fullname: "user/repo".to_string(), -// scheme: Some(Scheme::Https), -// user: Some("user".to_string()), -// token: None, -// port: None, -// path: "/user/repo.git".to_string(), -// //git_suffix: true, -// //scheme_prefix: true, -// print_scheme: true, -// }; +//fn ssh_user_path_not_acctname_reponame_format() { +// let test_url = "git@test.com:repo"; +// let e = GitUrl::parse(test_url); // -// assert_eq!(parsed, expected); +// assert!(e.is_err()); +// assert_eq!( +// format!("{}", e.err().unwrap()), +// "Git Url not in expected format" +// ); //} -// + +// Move test //#[test] -//fn ssh_user_github() { -// let test_url = "git@github.com:user/repo.git"; +//fn ssh_without_organization() { +// let test_url = "ssh://f589726c3611:29418/repo"; // let parsed = GitUrl::parse(test_url).expect("URL parse failed"); // let expected = GitUrl { -// host: Some("github.com".to_string()), +// host: Some("f589726c3611".to_string()), // //name: "repo".to_string(), -// //owner: Some("user".to_string()), -// //organization: None, -// //fullname: "user/repo".to_string(), -// scheme: Some(Scheme::Ssh), -// user: Some("git".to_string()), -// token: None, -// port: None, -// path: "user/repo.git".to_string(), -// //git_suffix: true, -// //scheme_prefix: false, -// print_scheme: false, -// }; -// -// assert_eq!(parsed, expected); -//} -// -//#[test] -//fn https_user_auth_github() { -// let test_url = "https://token:x-oauth-basic@github.com/owner/name.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: Some("github.com".to_string()), -// //name: "name".to_string(), -// //owner: Some("owner".to_string()), +// //owner: Some("repo".to_string()), // //organization: None, -// //fullname: "owner/name".to_string(), -// scheme: Some(Scheme::Https), -// user: Some("token".to_string()), -// token: Some("x-oauth-basic".to_string()), -// port: None, -// path: "/owner/name.git".to_string(), -// //git_suffix: true, -// //scheme_prefix: true, -// print_scheme: true, -// }; -// -// assert_eq!(parsed, expected); -//} -// -//#[test] -//fn ssh_user_azure_devops() { -// let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: Some("ssh.dev.azure.com".to_string()), -// //name: "RepoName".to_string(), -// //owner: Some("ProjectName".to_string()), -// //organization: Some("CompanyName".to_string()), -// //fullname: "CompanyName/ProjectName/RepoName".to_string(), +// //fullname: "repo/repo".to_string(), // scheme: Some(Scheme::Ssh), -// user: Some("git".to_string()), -// token: None, -// port: None, -// path: "v3/CompanyName/ProjectName/RepoName".to_string(), -// //git_suffix: false, -// //scheme_prefix: false, -// print_scheme: false, -// }; -// -// assert_eq!(parsed, expected); -//} -// -//#[test] -//fn https_user_azure_devops() { -// let test_url = "https://organization@dev.azure.com/organization/project/_git/repo"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: Some("dev.azure.com".to_string()), -// //name: "repo".to_string(), -// //owner: Some("project".to_string()), -// //organization: Some("organization".to_string()), -// //fullname: "organization/project/repo".to_string(), -// scheme: Some(Scheme::Https), -// user: Some("organization".to_string()), +// user: None, // token: None, -// port: None, -// path: "/organization/project/_git/repo".to_string(), +// port: Some(29418), +// path: "repo".to_string(), // //git_suffix: false, // //scheme_prefix: true, // print_scheme: true, @@ -215,186 +283,7 @@ fn ssh_user_ports() { // // assert_eq!(parsed, expected); //} -// -//#[test] -//fn ftp_user() { -// let test_url = "ftp://git@host.tld/user/project-name.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: Some("host.tld".to_string()), -// //name: "project-name".to_string(), -// //owner: Some("user".to_string()), -// //organization: None, -// //fullname: "user/project-name".to_string(), -// scheme: Some(Scheme::Ftp), -// user: Some("git".to_string()), -// token: None, -// port: None, -// path: "/user/project-name.git".to_string(), -// //git_suffix: true, -// //scheme_prefix: true, -// print_scheme: true, -// }; -// -// assert_eq!(parsed, expected); -//} -// -//#[test] -//fn ftps_user() { -// let test_url = "ftps://git@host.tld/user/project-name.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: Some("host.tld".to_string()), -// //name: "project-name".to_string(), -// //owner: Some("user".to_string()), -// //organization: None, -// //fullname: "user/project-name".to_string(), -// scheme: Some(Scheme::Ftps), -// user: Some("git".to_string()), -// token: None, -// port: None, -// path: "/user/project-name.git".to_string(), -// //git_suffix: true, -// //scheme_prefix: true, -// print_scheme: true, -// }; -// -// assert_eq!(parsed, expected); -//} -// -//#[test] -//fn relative_unix_path() { -// let test_url = "../project-name.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: None, -// //name: "project-name".to_string(), -// //owner: None, -// //organization: None, -// //fullname: "project-name".to_string(), -// scheme: Some(Scheme::File), -// user: None, -// token: None, -// port: None, -// path: "../project-name.git".to_string(), -// //git_suffix: true, -// //scheme_prefix: false, -// print_scheme: false, -// }; -// -// assert_eq!(parsed, expected); -//} -// -//#[test] -//fn absolute_unix_path() { -// let test_url = "/path/to/project-name.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: None, -// //name: "project-name".to_string(), -// //owner: None, -// //organization: None, -// //fullname: "project-name".to_string(), -// scheme: Some(Scheme::File), -// user: None, -// token: None, -// port: None, -// path: "/path/to/project-name.git".to_string(), -// //git_suffix: true, -// //scheme_prefix: false, -// print_scheme: false, -// }; -// -// assert_eq!(parsed, expected); -//} -// -//// Issue #6 - Relative Windows paths will parse into Unix paths -//#[test] -//fn relative_windows_path() { -// let test_url = "..\\project-name.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: None, -// //name: "project-name".to_string(), -// //owner: None, -// //organization: None, -// //fullname: "project-name".to_string(), -// scheme: Some(Scheme::File), -// user: None, -// token: None, -// port: None, -// path: "../project-name.git".to_string(), -// //git_suffix: true, -// //scheme_prefix: false, -// print_scheme: false, -// }; -// -// assert_eq!(parsed, expected); -//} -// -//// Can I use `typed-path` to deal with this? -//// Issue #7 - Absolute Windows paths will not parse at all -//#[should_panic(expected = "URL parse failed: UnexpectedFormat")] -//#[test] -//fn absolute_windows_path() { -// let test_url = "c:\\project-name.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: None, -// //name: "project-name".to_string(), -// //owner: None, -// //organization: None, -// //fullname: "project-name".to_string(), -// scheme: Some(Scheme::File), -// user: None, -// token: None, -// port: None, -// path: "c:\\project-name.git".to_string(), -// //git_suffix: true, -// //scheme_prefix: false, -// print_scheme: false, -// }; -// -// assert_eq!(parsed, expected); -//} -// -//// Move test -////#[test] -////fn ssh_user_path_not_acctname_reponame_format() { -//// let test_url = "git@test.com:repo"; -//// let e = GitUrl::parse(test_url); -//// -//// assert!(e.is_err()); -//// assert_eq!( -//// format!("{}", e.err().unwrap()), -//// "Git Url not in expected format" -//// ); -////} -// -//// Move test -////#[test] -////fn ssh_without_organization() { -//// let test_url = "ssh://f589726c3611:29418/repo"; -//// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -//// let expected = GitUrl { -//// host: Some("f589726c3611".to_string()), -//// //name: "repo".to_string(), -//// //owner: Some("repo".to_string()), -//// //organization: None, -//// //fullname: "repo/repo".to_string(), -//// scheme: Some(Scheme::Ssh), -//// user: None, -//// token: None, -//// port: Some(29418), -//// path: "repo".to_string(), -//// //git_suffix: false, -//// //scheme_prefix: true, -//// print_scheme: true, -//// }; -//// -//// assert_eq!(parsed, expected); -////} -// + //#[test] //fn empty_path() { // assert_eq!( @@ -402,40 +291,30 @@ fn ssh_user_ports() { // GitUrl::parse("file://").unwrap_err() // ) //} -// -//#[test] -//fn bad_port_number() { -// let test_url = "https://github.com:crypto-browserify/browserify-rsa.git"; -// let e = GitUrl::parse(test_url); -// -// assert!(e.is_err()); -// assert_eq!( -// format!("{}", e.err().unwrap()), -// "Error from Url crate: invalid port number" -// ); -//} -// -//// This test might not have a use anymore if we're not expanding "git:" -> "git://" -//#[test] -//fn git() { -// let test_url = "git://github.com/owner/name.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: Some("github.com".to_string()), -// //name: "name".to_string(), -// //owner: Some("owner".to_string()), -// //organization: None, -// //fullname: "owner/name".to_string(), -// scheme: Some(Scheme::Git), -// user: None, -// token: None, -// port: None, -// path: "/owner/name.git".to_string(), -// //git_suffix: true, -// //scheme_prefix: true, -// print_scheme: true, -// }; -// -// assert_eq!(parsed, expected); -//} -// + +#[test] +fn bad_port_number() { + let test_url = "https://github.com:crypto-browserify/browserify-rsa.git"; + let e = GitUrl::parse(test_url); + + assert!(e.is_err()); + assert_eq!( + format!("{}", e.err().unwrap()), + "Error from Url crate: invalid port number" + ); +} + +// This test might not have a use anymore if we're not expanding "git:" -> "git://" +#[test] +fn git() { + let test_url = "git://github.com/owner/name.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let expected = GitUrlBuilder::default() + .scheme(Some(Scheme::Git)) + .host(Some(String::from("github.com"))) + .path(String::from("/owner/name.git")) + .print_scheme(true) + .build() + .unwrap(); + assert_eq!(parsed, expected); +} From a1d3163fa1f551f4b656a2eaaa13dc0c1c537c62 Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Mon, 18 Aug 2025 19:21:37 -0700 Subject: [PATCH 07/32] Tuning getters and setters Refactoring into separate files --- Cargo.toml | 1 + src/lib.rs | 1028 +------------------------------------------ src/provider/mod.rs | 3 + src/types/error.rs | 47 ++ src/types/mod.rs | 755 +++++++++++++++++++++++++++++++ tests/parse.rs | 123 +++--- 6 files changed, 870 insertions(+), 1087 deletions(-) create mode 100644 src/provider/mod.rs create mode 100644 src/types/error.rs create mode 100644 src/types/mod.rs diff --git a/Cargo.toml b/Cargo.toml index 064e24e..0d78ed1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ thiserror = "2" nom = "8" derive_builder = "0.20" +getset = "0.1.6" [dev-dependencies] env_logger = "0.11" diff --git a/src/lib.rs b/src/lib.rs index d9857d9..3f789d8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,1028 +1,6 @@ -use core::str; -use derive_builder::Builder; -use std::fmt; -use std::str::FromStr; -use strum::{Display, EnumString, VariantNames}; -use thiserror::Error; -//use url::Url; - -use nom::branch::alt; -use nom::bytes::complete::{tag, take_till, take_until, take_while}; -use nom::character::complete::one_of; -use nom::sequence::{preceded, terminated}; -use nom::{IResult, Parser, combinator::opt, combinator::rest}; +mod provider; +mod types; +pub use types::{GitUrl, GitUrlBuilder, GitUrlBuilderError, GitUrlParseError, Scheme}; #[cfg(feature = "tracing")] use tracing::debug; - -/// Supported uri schemes for parsing -#[derive(Debug, PartialEq, Eq, EnumString, VariantNames, Clone, Display)] -#[strum(serialize_all = "kebab_case")] -pub enum Scheme { - /// Represents `file://` url scheme - File, - /// Represents `ftp://` url scheme - Ftp, - /// Represents `ftps://` url scheme - Ftps, - /// Represents `git://` url scheme - Git, - /// Represents `git+ssh://` url scheme - #[strum(serialize = "git+ssh")] - GitSsh, - /// Represents `http://` url scheme - Http, - /// Represents `https://` url scheme - Https, - /// Represents `ssh://` url scheme - Ssh, - ///// Represents No url scheme - //Unspecified, - /// - Other(String), // todo: need test for this -} - -#[derive(Clone, Debug, Default, PartialEq, Eq)] -enum GitUrlParseHint { - #[default] - Unknown, - Sshlike, - Filelike, - Httplike, - //Custom // needed? -} - -#[derive(Debug, Default, Clone)] -struct GitUrlBuilderOld { - hint: GitUrlParseHint, - scheme: Option, - user: Option, - token: Option, - host: Option, - port: Option, - path: String, - print_scheme: bool, - //working_url: String, -} - -impl GitUrlBuilder { - fn prebuild_check(&self) -> Result<(), String> { - #[cfg(feature = "tracing")] - debug!("Processing: {:?}", &url); - - // Error if there are null bytes within the url - - // https://github.com/tjtelan/git-url-parse-rs/issues/16 - //if let Some(Some(host)) = &self.host { - // if host.contains('\0') { - // return Err(GitUrlParseError::FoundNullBytes.to_string()); - // } - - // if host.is_empty() { - // return Err( - // GitUrlParseError::UnexpectedEmptyValue(String::from("host")).to_string() - // ); - // } - //} - - //if let Some(Some(user)) = &self.user { - // if user.contains('\0') { - // return Err(GitUrlParseError::FoundNullBytes.to_string()); - // } - - // if user.is_empty() { - // return Err( - // GitUrlParseError::UnexpectedEmptyValue(String::from("user")).to_string() - // ); - // } - //} - - //if let Some(Some(token)) = &self.token { - // if token.contains('\0') { - // return Err(GitUrlParseError::FoundNullBytes.to_string()); - // } - - // if token.is_empty() { - // return Err( - // GitUrlParseError::UnexpectedEmptyValue(String::from("token")).to_string(), - // ); - // } - //} - - //if let Some(path) = &self.path { - // if path.contains('\0') { - // return Err(GitUrlParseError::FoundNullBytes.to_string()); - // } - // if path.is_empty() { - // return Err( - // GitUrlParseError::UnexpectedEmptyValue(String::from("path")).to_string() - // ); - // } - //} - - Ok(()) - } - - fn parse(url: &str) -> Result { - println!("start: {url}"); - let mut giturl = GitUrlBuilder::default(); - let mut working_url = url; - let mut hint = GitUrlParseHint::default(); - - //working_url = giturl.parse_scheme(&working_url); - giturl.parse_scheme(&mut working_url, &mut hint); - //working_url = giturl.parse_auth_info(&working_url); - giturl.parse_auth_info(&mut working_url, &mut hint); - let save_state = working_url.clone(); - - //working_url = giturl.parse_host_port(&working_url); - giturl.parse_host_port(&mut working_url, &mut hint); - - match hint { - GitUrlParseHint::Httplike => {} - GitUrlParseHint::Sshlike => { - //working_url = giturl.parse_ssh_path(&working_url); - giturl.parse_ssh_path(&mut working_url, &mut hint); - } - GitUrlParseHint::Filelike | GitUrlParseHint::Unknown => { - working_url = save_state; - giturl.host(None); - //giturl.host = None; - giturl.scheme(Scheme::File); - //giturl.scheme = Some(Scheme::File); - } - } - - //(giturl, _) = parse_path(giturl, working_url)?; - giturl.parse_path(&mut working_url, &mut hint); - - println!(""); - Ok(giturl) - } - - fn parse_scheme(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { - let mut builder = self.clone(); - - if let Ok((leftover, Some(s))) = GitUrlBuilder::_parse_scheme(working_url) { - println!("leftover: {leftover}, scheme: {s:?}"); - - let scheme = Scheme::from_str(s).expect("Unknown scheme"); - - // todo: pass in hint! - *hint = match &scheme { - Scheme::Ssh => GitUrlParseHint::Sshlike, - Scheme::File => GitUrlParseHint::Filelike, - _ => GitUrlParseHint::Httplike, - }; - - builder.scheme(Some(scheme)); - //builder.scheme = Some(scheme); - builder.print_scheme(true); - //builder.print_scheme = true; - //working_url = leftover; - - *self = builder; - *working_url = leftover; - } - //else { - // working_url - //} - } - - fn parse_auth_info(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { - let mut builder = self.clone(); - if let Ok((leftover, Some(username))) = GitUrlBuilder::_parse_username(working_url) { - println!("leftover: {leftover}, username: {username:?}"); - //builder.user = Some(username.to_string()); - builder.user(Some(username.to_string())); - - //working_url = leftover; - - if *hint == GitUrlParseHint::Unknown { - *hint = GitUrlParseHint::Sshlike; - } - - if let Ok((token, Some(real_username))) = GitUrlBuilder::_parse_token(username) { - println!("token: {token}, real_username: {real_username:?}"); - //builder.user = Some(real_username.to_string()); - builder.user(Some(real_username.to_string())); - //builder.token = Some(token.to_string()); - builder.token(Some(token.to_string())); - - if *hint == GitUrlParseHint::Unknown || *hint == GitUrlParseHint::Sshlike { - *hint = GitUrlParseHint::Httplike; - } - } - - *working_url = leftover; - *self = builder; - //(builder, leftover) - } - //else { - // (builder, working_url) - //} - } - - fn parse_host_port(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { - let mut builder = self.clone(); - let mut save = working_url.clone(); - - if let Ok((leftover, Some(hostname))) = GitUrlBuilder::_parse_hostname(save) { - println!("leftover {leftover}, hostname: {hostname}"); - //builder.host = Some(hostname.to_string()); - builder.host(Some(hostname.to_string())); - //working_url = leftover; - save = leftover; - } - - if let Ok((leftover, Some(port))) = GitUrlBuilder::_parse_port(save) { - if !port.is_empty() { - println!("leftover {leftover}, port: {port}"); - //builder.port = Some(u16::from_str(port).expect("Not a valid port")); - builder.port(Some(u16::from_str(port).expect("Not a valid port"))); - //working_url = leftover; - save = leftover; - - if *hint == GitUrlParseHint::Unknown { - *hint = GitUrlParseHint::Httplike; - } - } - } - - *self = builder; - *working_url = save; - - //(builder, save) - } - - fn parse_ssh_path(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { - let mut builder = self.clone(); - // https://mslinn.com/git/040-git-urls.html - we only support relative paths when we have ports - //if let Some(ssh_path) = working_url.strip_prefix(":") { - //} - - if let Ok((_leftover, Some(path))) = GitUrlBuilder::_parse_ssh_path(working_url) { - //working_url = path; - // This is important for printing the url correctly with the ":" - //builder.scheme = Some(Scheme::Ssh); - builder.scheme(Some(Scheme::Ssh)); - - *self = builder; - *working_url = path; - //(builder, path) - } - //else { - // (builder, working_url) - //} - } - - fn parse_path(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { - let mut builder = self.clone(); - if let Ok((leftover, path)) = GitUrlBuilder::_parse_path(working_url) { - println!("leftover {leftover}, path: {path}"); - //if path.is_empty() { - // return Err(GitUrlParseError::UnexpectedEmptyValue(String::from("path"))); - //} - - //builder.path = path.to_string(); - builder.path(path.to_string()); - - *self = builder; - *working_url = leftover; - //Ok((builder.clone(), leftover)) - } - //else { - // Ok((builder.clone(), working_url)) - //} - } - - //// - - fn _parse_scheme(input: &str) -> IResult<&str, Option<&str>> { - opt(terminated( - alt(( - tag(Scheme::File.to_string().as_bytes()), - tag(Scheme::Ftps.to_string().as_bytes()), - tag(Scheme::Ftp.to_string().as_bytes()), - tag(Scheme::GitSsh.to_string().as_bytes()), - tag(Scheme::Git.to_string().as_bytes()), - tag(Scheme::Https.to_string().as_bytes()), - tag(Scheme::Http.to_string().as_bytes()), - tag(Scheme::Ssh.to_string().as_bytes()), - // todo: Other(), needs a test - )), - tag("://"), - )) - .parse(input) - } - - fn _parse_username(input: &str) -> IResult<&str, Option<&str>> { - opt(terminated(take_until("@"), tag("@"))).parse(input) - } - - fn _parse_token(input: &str) -> IResult<&str, Option<&str>> { - opt(terminated(take_until(":"), tag(":"))).parse(input) - } - - fn _parse_hostname(input: &str) -> IResult<&str, Option<&str>> { - opt(take_till(|c| c == '/' || c == ':')).parse(input) - } - - fn _parse_port(input: &str) -> IResult<&str, Option<&str>> { - opt(preceded(tag(":"), take_while(|c: char| c.is_digit(10)))).parse(input) - } - - // This is making an assumption that the path is relative, not absolute - // This is bc we do not support absolute paths when we also have a port - fn _parse_ssh_path(input: &str) -> IResult<&str, Option<&str>> { - opt(preceded(one_of("/:"), rest)).parse(input) - } - - fn _parse_path(input: &str) -> IResult<&str, &str> { - rest(input) - } -} - -/// GitUrl represents an input url that is a url used by git -/// Internally during parsing the url is sanitized and uses the `url` crate to perform -/// the majority of the parsing effort, and with some extra handling to expose -/// metadata used my many git hosting services -#[derive(Debug, PartialEq, Eq, Clone, Builder)] -#[builder(build_fn(validate = "Self::prebuild_check"))] -pub struct GitUrl { - /// The fully qualified domain name (FQDN) or IP of the repo - #[builder(setter(into), default)] - host: Option, - ///// The name of the repo - //pub name: String, - ///// The owner/account/project name - //pub owner: Option, - ///// The organization name. Supported by Azure DevOps - //pub organization: Option, - ///// The full name of the repo, formatted as "owner/name" - //pub fullname: String, - ///// The git url scheme - #[builder(setter(into), default)] - scheme: Option, - /// The authentication user - #[builder(setter(into), default)] - user: Option, - /// The oauth token (could appear in the https urls) - #[builder(setter(into), default)] - token: Option, - /// The non-conventional port where git service is hosted - #[builder(setter(into), default)] - port: Option, - /// The path to repo w/ respect to user + hostname - #[builder(setter(into))] - path: String, - ///// Indicate if url uses the .git suffix - //pub git_suffix: bool, - ///// Indicate if url explicitly uses its scheme - //pub scheme_prefix: bool, - #[builder(default)] - print_scheme: bool, -} - -/// Build the printable GitUrl from its components -impl fmt::Display for GitUrl { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - //let scheme_prefix = match self.scheme_prefix { - // true => format!("{}://", self.scheme), - // false => String::new(), - //}; - - let scheme = if let Some(scheme) = &self.scheme - && self.print_scheme - { - format!("{}://", scheme) - } else { - String::new() - }; - - //let scheme_prefix = if self.print_scheme && self.scheme_prefix { - // format!("{}://", self.scheme) - //} else { - // String::new() - //}; - - let auth_info = match self.scheme { - Some(Scheme::Ssh) | Some(Scheme::Git) | Some(Scheme::GitSsh) => { - if let Some(user) = &self.user { - format!("{user}@") - } else { - String::new() - } - } - Some(Scheme::Http) | Some(Scheme::Https) => match (&self.user, &self.token) { - (Some(user), Some(token)) => format!("{user}:{token}@"), - (Some(user), None) => format!("{user}@",), - (None, Some(token)) => format!("{token}@"), - (None, None) => String::new(), - }, - _ => String::new(), - }; - - let host = match &self.host { - Some(host) => host.to_string(), - None => String::new(), - }; - - let port = match &self.port { - Some(p) => format!(":{}", p), - None => String::new(), - }; - - ////let path = match &self.scheme { - //// Scheme::Ssh => { - //// if self.port.is_some() { - //// format!("/{}", &self.path) - //// } else { - //// format!(":{}", &self.path) - //// } - //// } - //// _ => self.path.to_string(), - ////}; - - let path = if self.scheme == Some(Scheme::Ssh) { - if self.port.is_some() { - if !self.path.starts_with('/') { - format!("/{}", &self.path) - } else { - self.path.to_string() - } - } else { - format!(":{}", &self.path) - } - } else { - self.path.to_string() - }; - - let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}"); - - write!(f, "{}", git_url_str) - } -} - -impl Default for GitUrl { - fn default() -> Self { - GitUrl { - host: None, - //name: "".to_string(), - //owner: None, - //organization: None, - //fullname: "".to_string(), - scheme: None, - user: None, - token: None, - port: None, - path: "".to_string(), - //git_suffix: false, - //scheme_prefix: false, - print_scheme: false, - } - } -} - -impl FromStr for GitUrl { - //type Err = GitUrlParseError; - type Err = GitUrlBuilderError; - - fn from_str(s: &str) -> Result { - GitUrl::parse(s) - } -} - -impl GitUrl { - /// Returns `GitUrl` after removing `user` and `token` values - /// Intended use-case is for non-destructive printing GitUrl excluding any embedded auth info - pub fn trim_auth(&self) -> GitUrl { - let mut new_giturl = self.clone(); - new_giturl.user = None; - new_giturl.token = None; - new_giturl - } - - /// Returns a `Result` after normalizing and parsing `url` for metadata - pub fn parse(url: &str) -> Result { - println!("start: {url}"); - let mut giturl = GitUrlBuilder::parse(url).unwrap(); - //let mut working_url = url; - - //(giturl, working_url) = parse_scheme(giturl, &working_url); - //(giturl, working_url) = parse_auth_info(giturl, working_url); - - //let save_state = working_url; - - //(giturl, working_url) = parse_host_port(giturl, working_url); - - //match giturl.hint { - // GitUrlParseHint::Httplike => {} - // GitUrlParseHint::Sshlike => { - // (giturl, working_url) = parse_ssh_path(giturl, working_url); - // } - // GitUrlParseHint::Filelike | GitUrlParseHint::Unknown => { - // working_url = save_state; - // giturl.host = None; - // giturl.scheme = Some(Scheme::File); - // } - //} - - //(giturl, _) = parse_path(giturl, working_url)?; - - //println!(""); - giturl.build() - //// Normalize the url so we can use Url crate to process ssh urls - //let normalized = normalize_url(url)?; - - //// Some pre-processing for paths - //let scheme = if let Ok(scheme) = Scheme::from_str(normalized.scheme()) { - // scheme - //} else { - // return Err(GitUrlParseError::UnsupportedScheme( - // normalized.scheme().to_string(), - // )); - //}; - //if normalized.path().is_empty() { - // return Err(GitUrlParseError::EmptyPath); - //} - - //// Normalized ssh urls can always have their first '/' removed - //let urlpath = match &scheme { - // Scheme::Ssh => { - // // At the moment, we're relying on url::Url's parse() behavior to not duplicate - // // the leading '/' when we normalize - // normalized.path()[1..].to_string() - // } - // _ => normalized.path().to_string(), - //}; - - //let git_suffix_check = &urlpath.ends_with(".git"); - - //// Parse through path for name,owner,organization - //// Support organizations for Azure Devops - //#[cfg(feature = "tracing")] - //debug!("The urlpath: {:?}", &urlpath); - - //// Most git services use the path for metadata in the same way, so we're going to separate - //// the metadata - //// ex. github.com/accountname/reponame - //// owner = accountname - //// name = reponame - //// - //// organizations are going to be supported on a per-host basis - //let splitpath = &urlpath.rsplit_terminator('/').collect::>(); - - //#[cfg(feature = "tracing")] - //debug!("rsplit results for metadata: {:?}", splitpath); - - //let name = splitpath[0].trim_end_matches(".git").to_string(); - - //// TODO: I think here is where we want to update the url pattern identification step.. I want to be able to have a hint that the user can pass - - //let (owner, organization, fullname) = match &scheme { - // // We're not going to assume anything about metadata from a filepath - // Scheme::File => (None::, None::, name.clone()), - // _ => { - // let mut fullname: Vec<&str> = Vec::new(); - - // // TODO: Add support for parsing out orgs from these urls - // let hosts_w_organization_in_path = ["dev.azure.com", "ssh.dev.azure.com"]; - // //vec!["dev.azure.com", "ssh.dev.azure.com", "visualstudio.com"]; - - // let host_str = if let Some(host) = normalized.host_str() { - // host - // } else { - // return Err(GitUrlParseError::UnsupportedUrlHostFormat); - // }; - - // match hosts_w_organization_in_path.contains(&host_str) { - // true => { - // #[cfg(feature = "tracing")] - // debug!("Found a git provider with an org"); - - // // The path differs between git:// and https:// schemes - - // match &scheme { - // // Example: "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName", - // Scheme::Ssh => { - // // Organization - // fullname.push(splitpath[2]); - // // Project/Owner name - // fullname.push(splitpath[1]); - // // Repo name - // fullname.push(splitpath[0]); - - // ( - // Some(splitpath[1].to_string()), - // Some(splitpath[2].to_string()), - // fullname.join("/"), - // ) - // } - // // Example: "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName", - // Scheme::Https => { - // // Organization - // fullname.push(splitpath[3]); - // // Project/Owner name - // fullname.push(splitpath[2]); - // // Repo name - // fullname.push(splitpath[0]); - - // ( - // Some(splitpath[2].to_string()), - // Some(splitpath[3].to_string()), - // fullname.join("/"), - // ) - // } - - // // TODO: I'm not sure if I want to support throwing this error long-term - // _ => return Err(GitUrlParseError::UnexpectedScheme), - // } - // } - // false => { - // if !url.starts_with("ssh") && splitpath.len() < 2 { - // return Err(GitUrlParseError::UnexpectedFormat); - // } - - // let position = match splitpath.len() { - // 0 => return Err(GitUrlParseError::UnexpectedFormat), - // 1 => 0, - // _ => 1, - // }; - - // // push owner - // fullname.push(splitpath[position]); - // // push name - // fullname.push(name.as_str()); - - // ( - // Some(splitpath[position].to_string()), - // None::, - // fullname.join("/"), - // ) - // } - // } - // } - //}; - - //let final_host = match scheme { - // Scheme::File => None, - // _ => normalized.host_str().map(|h| h.to_string()), - //}; - - //let final_path = match scheme { - // Scheme::File => { - // if let Some(host) = normalized.host_str() { - // format!("{}{}", host, urlpath) - // } else { - // urlpath - // } - // } - // _ => urlpath, - //}; - - //Ok(GitUrl { - // host: final_host, - // name, - // owner, - // organization, - // fullname, - // scheme, - // user: match normalized.username().to_string().len() { - // 0 => None, - // _ => Some(normalized.username().to_string()), - // }, - // token: normalized.password().map(|p| p.to_string()), - // port: normalized.port(), - // path: final_path, - // git_suffix: *git_suffix_check, - // scheme_prefix: url.contains("://") || url.starts_with("git:"), - //}) - } -} - -// start of old - -//fn parse_scheme<'a>(giturl: GitUrlBuilderOld, working_url: &'a str) -> (GitUrlBuilderOld, &'a str) { -// let mut builder = giturl.clone(); -// -// if let Ok((leftover, Some(s))) = GitUrlBuilderOld::scheme(working_url) { -// println!("leftover: {leftover}, scheme: {s:?}"); -// -// let scheme = Scheme::from_str(s).expect("Unknown scheme"); -// -// builder.hint = match &scheme { -// Scheme::Ssh => GitUrlParseHint::Sshlike, -// Scheme::File => GitUrlParseHint::Filelike, -// _ => GitUrlParseHint::Httplike, -// }; -// -// builder.scheme = Some(scheme); -// builder.print_scheme = true; -// //working_url = leftover; -// -// (builder, leftover) -// } else { -// (builder, working_url) -// } -//} -// -//fn parse_auth_info<'a>( -// giturl: GitUrlBuilderOld, -// working_url: &'a str, -//) -> (GitUrlBuilderOld, &'a str) { -// let mut builder = giturl.clone(); -// if let Ok((leftover, Some(username))) = GitUrlBuilderOld::username(working_url) { -// println!("leftover: {leftover}, username: {username:?}"); -// builder.user = Some(username.to_string()); -// -// //working_url = leftover; -// -// if builder.hint == GitUrlParseHint::Unknown { -// builder.hint = GitUrlParseHint::Sshlike; -// } -// -// if let Ok((token, Some(real_username))) = GitUrlBuilderOld::token(username) { -// println!("token: {token}, real_username: {real_username:?}"); -// builder.user = Some(real_username.to_string()); -// builder.token = Some(token.to_string()); -// -// if builder.hint == GitUrlParseHint::Unknown || builder.hint == GitUrlParseHint::Sshlike -// { -// builder.hint = GitUrlParseHint::Httplike; -// } -// } -// -// (builder, leftover) -// } else { -// (builder, working_url) -// } -//} -//fn parse_host_port<'a>( -// giturl: GitUrlBuilderOld, -// working_url: &'a str, -//) -> (GitUrlBuilderOld, &'a str) { -// let mut builder = giturl.clone(); -// let mut save = working_url; -// -// if let Ok((leftover, Some(hostname))) = GitUrlBuilderOld::hostname(working_url) { -// println!("leftover {leftover}, hostname: {hostname}"); -// builder.host = Some(hostname.to_string()); -// //working_url = leftover; -// save = leftover; -// } -// -// if let Ok((leftover, Some(port))) = GitUrlBuilderOld::port(save) { -// if !port.is_empty() { -// println!("leftover {leftover}, port: {port}"); -// builder.port = Some(u16::from_str(port).expect("Not a valid port")); -// //working_url = leftover; -// save = leftover; -// -// if builder.hint == GitUrlParseHint::Unknown { -// builder.hint = GitUrlParseHint::Httplike; -// } -// } -// } -// -// (builder, save) -//} -// -//fn parse_ssh_path<'a>( -// giturl: GitUrlBuilderOld, -// working_url: &'a str, -//) -> (GitUrlBuilderOld, &'a str) { -// let mut builder = giturl.clone(); -// // https://mslinn.com/git/040-git-urls.html - we only support relative paths when we have ports -// //if let Some(ssh_path) = working_url.strip_prefix(":") { -// //} -// -// if let Ok((_leftover, Some(path))) = GitUrlBuilderOld::ssh_path(working_url) { -// //working_url = path; -// // This is important for printing the url correctly with the ":" -// builder.scheme = Some(Scheme::Ssh); -// (builder, path) -// } else { -// (builder, working_url) -// } -//} -// -//fn parse_path<'a>( -// giturl: GitUrlBuilderOld, -// working_url: &'a str, -//) -> Result<(GitUrlBuilderOld, &'a str), GitUrlParseError> { -// let mut builder = giturl.clone(); -// if let Ok((leftover, path)) = GitUrlBuilderOld::path(working_url) { -// println!("leftover {leftover}, path: {path}"); -// if path.is_empty() { -// return Err(GitUrlParseError::UnexpectedEmptyValue(String::from("path"))); -// } -// -// builder.path = path.to_string(); -// Ok((builder.clone(), leftover)) -// } else { -// Ok((builder.clone(), working_url)) -// } -//} - -// end of old - -/// `normalize_ssh_url` takes in an ssh url that separates the login info -/// from the path into with a `:` and replaces it with `/`. -/// -/// Prepends `ssh://` to url -/// -/// Supports absolute and relative paths -//fn normalize_ssh_url(url: &str) -> Result { -// let u = url.split(':').collect::>(); -// -// match u.len() { -// 2 => { -// #[cfg(feature = "tracing")] -// debug!("Normalizing ssh url: {:?}", u); -// normalize_url(&format!("ssh://{}/{}", u[0], u[1])) -// } -// 3 => { -// #[cfg(feature = "tracing")] -// debug!("Normalizing ssh url with ports: {:?}", u); -// normalize_url(&format!("ssh://{}:{}/{}", u[0], u[1], u[2])) -// } -// _default => Err(GitUrlParseError::UnsupportedSshUrlFormat), -// } -//} - -/// `normalize_file_path` takes in a filepath and uses `Url::from_file_path()` to parse -/// -/// Prepends `file://` to url -//#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] -//fn normalize_file_path(filepath: &str) -> Result { -// let fp = Url::from_file_path(filepath); -// -// match fp { -// Ok(path) => Ok(path), -// Err(_e) => { -// if let Ok(file_url) = normalize_url(&format!("file://{}", filepath)) { -// Ok(file_url) -// } else { -// Err(GitUrlParseError::FileUrlNormalizeFailedSchemeAdded) -// } -// } -// } -//} - -#[cfg(target_arch = "wasm32")] -fn normalize_file_path(_filepath: &str) -> Result { - unreachable!() -} - -/// `normalize_url` takes in url as `&str` and takes an opinionated approach to identify -/// `ssh://` or `file://` urls that require more information to be added so that -/// they can be parsed more effectively by `url::Url::parse()` -//pub fn normalize_url(url: &str) -> Result { -// #[cfg(feature = "tracing")] -// debug!("Processing: {:?}", &url); -// -// // TODO: Should this be extended to check for any whitespace? -// // Error if there are null bytes within the url -// // https://github.com/tjtelan/git-url-parse-rs/issues/16 -// if url.contains('\0') { -// return Err(GitUrlParseError::FoundNullBytes); -// } -// -// // We're going to remove any trailing slash before running through Url::parse -// let trim_url = url.trim_end_matches('/'); -// -// // TODO: Remove support for this form when I go to next major version. -// // I forget what it supports, and it isn't obvious after searching for examples -// // normalize short git url notation: git:host/path -// let url_to_parse = if trim_url.starts_with("git:") && !trim_url.starts_with("git://") { -// trim_url.replace("git:", "git://") -// } else { -// trim_url.to_string() -// }; -// -// let url_parse = Url::parse(&url_to_parse); -// -// Ok(match url_parse { -// Ok(u) => { -// match Scheme::from_str(u.scheme()) { -// Ok(_p) => u, -// Err(_e) => { -// // Catch case when an ssh url is given w/o a user -// #[cfg(feature = "tracing")] -// debug!("Scheme parse fail. Assuming a userless ssh url"); -// if let Ok(ssh_url) = normalize_ssh_url(trim_url) { -// ssh_url -// } else { -// return Err(GitUrlParseError::SshUrlNormalizeFailedNoScheme); -// } -// } -// } -// } -// -// // If we're here, we're only looking for Scheme::Ssh or Scheme::File -// // TODO: Add test for this -// Err(url::ParseError::RelativeUrlWithoutBase) => { -// // Assuming we have found Scheme::Ssh if we can find an "@" before ":" -// // Otherwise we have Scheme::File -// //let re = Regex::new(r"^\S+(@)\S+(:).*$").with_context(|| { -// // "Failed to build ssh git url regex for testing against url".to_string() -// //})?; -// -// match is_ssh_url(trim_url) { -// true => { -// #[cfg(feature = "tracing")] -// debug!("Scheme::SSH match for normalization"); -// normalize_ssh_url(trim_url)? -// } -// false => { -// #[cfg(feature = "tracing")] -// debug!("Scheme::File match for normalization"); -// normalize_file_path(trim_url)? -// } -// } -// } -// Err(err) => { -// return Err(GitUrlParseError::from(err)); -// } -// }) -//} - -// Valid ssh `url` for cloning have a usernames, -// but we don't require it classification or parsing purposes -// However a path must be specified with a `:` -//fn is_ssh_url(url: &str) -> bool { -// // if we do not have a path -// if !url.contains(':') { -// return false; -// } -// -// // if we have a username, expect it before the path (Are usernames with colons valid?) -// if let (Some(at_pos), Some(colon_pos)) = (url.find('@'), url.find(':')) { -// if colon_pos < at_pos { -// return false; -// } -// -// // Make sure we provided a username, and not just `@` -// let parts: Vec<&str> = url.split('@').collect(); -// return parts.len() == 2 || parts[0].is_empty(); -// } -// -// // it's an ssh url if we have a domain:path pattern -// let parts: Vec<&str> = url.split(':').collect(); -// -// // FIXME: I am not sure how to validate a url with a port -// //if parts.len() != 3 && !parts[0].is_empty() && !parts[1].is_empty() && !parts[2].is_empty() { -// // return false; -// //} -// -// // This should also handle if a port is specified -// // no port example: ssh://user@domain:path/to/repo.git -// // port example: ssh://user@domain:port/path/to/repo.git -// parts.len() == 2 && parts[0].is_empty() && parts[1].is_empty() -//} - -#[derive(Error, Debug, PartialEq, Eq)] -pub enum GitUrlParseError { - //#[error("Error from derive_builder")] - //DeriveBuilderError(#[from] derive_builder::UninitializedFieldError), - - //#[error("Error from Url crate: {0}")] - //UrlParseError(#[from] url::ParseError), - - //#[error("No url scheme was found, then failed to normalize as ssh url.")] - //SshUrlNormalizeFailedNoScheme, - - //#[error("No url scheme was found, then failed to normalize as ssh url after adding 'ssh://'")] - //SshUrlNormalizeFailedSchemeAdded, - - //#[error("Failed to normalize as ssh url after adding 'ssh://'")] - //SshUrlNormalizeFailedSchemeAddedWithPorts, - - //#[error("No url scheme was found, then failed to normalize as file url.")] - //FileUrlNormalizeFailedNoScheme, - - //#[error("No url scheme was found, then failed to normalize as file url after adding 'file://'")] - //FileUrlNormalizeFailedSchemeAdded, - - //#[error("Git Url not in expected format")] - //UnexpectedFormat, - - // FIXME: Keep an eye on this error for removal - #[error("Git Url for host using unexpected scheme")] - UnexpectedScheme, - - //#[error("Scheme unsupported: {0}")] - //UnsupportedScheme(String), - //#[error("Host from Url cannot be str or does not exist")] - //UnsupportedUrlHostFormat, - //#[error("Git Url not in expected format for SSH")] - //UnsupportedSshUrlFormat, - //#[error("Normalized URL has no path")] - //EmptyPath, - #[error("Found null bytes within input url before parsing")] - FoundNullBytes, - - #[error("Value expected for field: {0}")] - UnexpectedEmptyValue(String), -} diff --git a/src/provider/mod.rs b/src/provider/mod.rs new file mode 100644 index 0000000..db356dd --- /dev/null +++ b/src/provider/mod.rs @@ -0,0 +1,3 @@ +// generic +// gitlab (subgroups) style +// azure devops diff --git a/src/types/error.rs b/src/types/error.rs new file mode 100644 index 0000000..81c7657 --- /dev/null +++ b/src/types/error.rs @@ -0,0 +1,47 @@ +use thiserror::Error; + +#[derive(Error, Debug, PartialEq, Eq)] +pub enum GitUrlParseError { + //#[error("Error from derive_builder")] + //DeriveBuilderError(#[from] derive_builder::UninitializedFieldError), + + //#[error("Error from Url crate: {0}")] + //UrlParseError(#[from] url::ParseError), + + //#[error("No url scheme was found, then failed to normalize as ssh url.")] + //SshUrlNormalizeFailedNoScheme, + + //#[error("No url scheme was found, then failed to normalize as ssh url after adding 'ssh://'")] + //SshUrlNormalizeFailedSchemeAdded, + + //#[error("Failed to normalize as ssh url after adding 'ssh://'")] + //SshUrlNormalizeFailedSchemeAddedWithPorts, + + //#[error("No url scheme was found, then failed to normalize as file url.")] + //FileUrlNormalizeFailedNoScheme, + + //#[error("No url scheme was found, then failed to normalize as file url after adding 'file://'")] + //FileUrlNormalizeFailedSchemeAdded, + + //#[error("Git Url not in expected format")] + //UnexpectedFormat, + + // FIXME: Keep an eye on this error for removal + #[error("Git Url for host using unexpected scheme")] + UnexpectedScheme, + + //#[error("Scheme unsupported: {0}")] + //UnsupportedScheme(String), + //#[error("Host from Url cannot be str or does not exist")] + //UnsupportedUrlHostFormat, + //#[error("Git Url not in expected format for SSH")] + //UnsupportedSshUrlFormat, + //#[error("Normalized URL has no path")] + //EmptyPath, + #[error("Found null bytes within input url before parsing")] + FoundNullBytes, + + // Maybe remove this. Handled by derive_builder + #[error("Value expected for field: {0}")] + UnexpectedEmptyValue(String), +} diff --git a/src/types/mod.rs b/src/types/mod.rs new file mode 100644 index 0000000..dc7fe47 --- /dev/null +++ b/src/types/mod.rs @@ -0,0 +1,755 @@ +mod error; +pub use error::GitUrlParseError; + +use derive_builder::Builder; +use getset::{Getters, Setters}; +use strum::{Display, EnumString, VariantNames}; + +use core::str; +use std::fmt; +use std::str::FromStr; +//use url::Url; + +use nom::branch::alt; +use nom::bytes::complete::{tag, take_till, take_until, take_while}; +use nom::character::complete::one_of; +use nom::sequence::{preceded, terminated}; +use nom::{IResult, Parser, combinator::opt, combinator::rest}; + +/// Supported uri schemes for parsing +#[derive(Debug, PartialEq, Eq, EnumString, VariantNames, Clone, Display)] +#[strum(serialize_all = "kebab_case")] +pub enum Scheme { + /// Represents `file://` url scheme + File, + /// Represents `ftp://` url scheme + Ftp, + /// Represents `ftps://` url scheme + Ftps, + /// Represents `git://` url scheme + Git, + /// Represents `git+ssh://` url scheme + #[strum(serialize = "git+ssh")] + GitSsh, + /// Represents `http://` url scheme + Http, + /// Represents `https://` url scheme + Https, + /// Represents `ssh://` url scheme + Ssh, + ///// Represents No url scheme + //Unspecified, + /// + Other(String), // todo: need test for this +} + +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub(crate) enum GitUrlParseHint { + #[default] + Unknown, + Sshlike, + Filelike, + Httplike, + //Custom // needed? +} + +/// GitUrl represents an input url that is a url used by git +/// Internally during parsing the url is sanitized and uses the `url` crate to perform +/// the majority of the parsing effort, and with some extra handling to expose +/// metadata used my many git hosting services +#[derive(Debug, PartialEq, Eq, Clone, Builder, Default, Getters, Setters)] +#[builder(build_fn(validate = "Self::prebuild_check"), field(public))] +#[get = "pub"] +pub struct GitUrl { + /// The fully qualified domain name (FQDN) or IP of the repo + #[builder(setter(into, strip_option), default)] + host: Option, + ///// The name of the repo + //pub name: String, + ///// The owner/account/project name + //pub owner: Option, + ///// The organization name. Supported by Azure DevOps + //pub organization: Option, + ///// The full name of the repo, formatted as "owner/name" + //pub fullname: String, + ///// The git url scheme + #[builder(setter(into, strip_option), default)] + scheme: Option, + /// The authentication user + #[builder(setter(into, strip_option), default)] + #[getset(set = "pub(crate)")] + user: Option, + /// The oauth token (could appear in the https urls) + #[builder(setter(into, strip_option), default)] + #[getset(set = "pub(crate)")] + token: Option, + /// The non-conventional port where git service is hosted + #[builder(setter(into, strip_option), default)] + port: Option, + /// The path to repo w/ respect to user + hostname + #[builder(setter(into))] + path: String, + ///// Indicate if url uses the .git suffix + //pub git_suffix: bool, + ///// Indicate if url explicitly uses its scheme + //pub scheme_prefix: bool, + #[builder(default)] + print_scheme: bool, +} + +impl GitUrlBuilder { + pub fn trim_auth(&mut self) { + self.user = None; + self.token = None; + } + + fn prebuild_check(&self) -> Result<(), String> { + #[cfg(feature = "tracing")] + debug!("Processing: {:?}", &url); + + // Error if there are null bytes within the url + + // https://github.com/tjtelan/git-url-parse-rs/issues/16 + if let Some(Some(host)) = &self.host { + if host.contains('\0') { + return Err(GitUrlParseError::FoundNullBytes.to_string()); + } + + if host.is_empty() { + return Err( + GitUrlParseError::UnexpectedEmptyValue(String::from("host")).to_string() + ); + } + } + + if let Some(Some(user)) = &self.user { + if user.contains('\0') { + return Err(GitUrlParseError::FoundNullBytes.to_string()); + } + + if user.is_empty() { + return Err( + GitUrlParseError::UnexpectedEmptyValue(String::from("user")).to_string() + ); + } + } + + if let Some(Some(token)) = &self.token { + if token.contains('\0') { + return Err(GitUrlParseError::FoundNullBytes.to_string()); + } + + if token.is_empty() { + return Err( + GitUrlParseError::UnexpectedEmptyValue(String::from("token")).to_string(), + ); + } + } + + if let Some(path) = &self.path { + if path.contains('\0') { + return Err(GitUrlParseError::FoundNullBytes.to_string()); + } + if path.is_empty() { + return Err( + GitUrlParseError::UnexpectedEmptyValue(String::from("path")).to_string() + ); + } + } + + Ok(()) + } + + fn parse(url: &str) -> Result { + println!("start: {url}"); + let mut giturl = GitUrlBuilder::default(); + let mut working_url = url; + let mut hint = GitUrlParseHint::default(); + + giturl.parse_scheme(&mut working_url, &mut hint); + giturl.parse_auth_info(&mut working_url, &mut hint); + let save_state = working_url; + + giturl.parse_host_port(&mut working_url, &mut hint); + + match hint { + GitUrlParseHint::Httplike => {} + GitUrlParseHint::Sshlike => { + //working_url = giturl.parse_ssh_path(&working_url); + giturl.parse_ssh_path(&mut working_url, &mut hint); + } + GitUrlParseHint::Filelike | GitUrlParseHint::Unknown => { + working_url = save_state; + giturl.host = None; + giturl.scheme(Scheme::File); + } + } + + giturl.parse_path(&mut working_url, &mut hint); + + println!(""); + Ok(giturl) + } + + fn parse_scheme(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { + let mut builder = self.clone(); + + if let Ok((leftover, Some(s))) = GitUrlBuilder::_parse_scheme(working_url) { + println!("leftover: {leftover}, scheme: {s:?}"); + + let scheme = Scheme::from_str(s).expect("Unknown scheme"); + + // todo: pass in hint! + *hint = match &scheme { + Scheme::Ssh => GitUrlParseHint::Sshlike, + Scheme::File => GitUrlParseHint::Filelike, + _ => GitUrlParseHint::Httplike, + }; + + builder.scheme(scheme); + builder.print_scheme(true); + + *self = builder; + *working_url = leftover; + } + } + + fn parse_auth_info(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { + let mut builder = self.clone(); + if let Ok((leftover, Some(username))) = GitUrlBuilder::_parse_username(working_url) { + println!("leftover: {leftover}, username: {username:?}"); + builder.user(username); + + if *hint == GitUrlParseHint::Unknown { + *hint = GitUrlParseHint::Sshlike; + } + + if let Ok((token, Some(real_username))) = GitUrlBuilder::_parse_token(username) { + println!("token: {token}, real_username: {real_username:?}"); + builder.user(real_username); + builder.token(token); + + if *hint == GitUrlParseHint::Unknown || *hint == GitUrlParseHint::Sshlike { + *hint = GitUrlParseHint::Httplike; + } + } + + *working_url = leftover; + *self = builder; + } + } + + fn parse_host_port(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { + let mut builder = self.clone(); + let mut save = working_url.clone(); + + if let Ok((leftover, Some(hostname))) = GitUrlBuilder::_parse_hostname(save) { + println!("leftover {leftover}, hostname: {hostname}"); + builder.host(hostname); + save = leftover; + } + + if let Ok((leftover, Some(port))) = GitUrlBuilder::_parse_port(save) { + if !port.is_empty() { + println!("leftover {leftover}, port: {port}"); + builder.port(u16::from_str(port).expect("Not a valid port")); + save = leftover; + + if *hint == GitUrlParseHint::Unknown { + *hint = GitUrlParseHint::Httplike; + } + } + } + + *self = builder; + *working_url = save; + } + + fn parse_ssh_path(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { + let mut builder = self.clone(); + // https://mslinn.com/git/040-git-urls.html - we only support relative paths when we have ports + //if let Some(ssh_path) = working_url.strip_prefix(":") { + //} + + if let Ok((_leftover, Some(path))) = GitUrlBuilder::_parse_ssh_path(working_url) { + builder.scheme(Scheme::Ssh); + + *self = builder; + *working_url = path; + } + } + + fn parse_path(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { + let mut builder = self.clone(); + if let Ok((leftover, path)) = GitUrlBuilder::_parse_path(working_url) { + println!("leftover {leftover}, path: {path}"); + + builder.path(path); + + *self = builder; + *working_url = leftover; + } + } + + //// + + fn _parse_scheme(input: &str) -> IResult<&str, Option<&str>> { + opt(terminated( + alt(( + tag(Scheme::File.to_string().as_bytes()), + tag(Scheme::Ftps.to_string().as_bytes()), + tag(Scheme::Ftp.to_string().as_bytes()), + tag(Scheme::GitSsh.to_string().as_bytes()), + tag(Scheme::Git.to_string().as_bytes()), + tag(Scheme::Https.to_string().as_bytes()), + tag(Scheme::Http.to_string().as_bytes()), + tag(Scheme::Ssh.to_string().as_bytes()), + // todo: Other(), needs a test + )), + tag("://"), + )) + .parse(input) + } + + fn _parse_username(input: &str) -> IResult<&str, Option<&str>> { + opt(terminated(take_until("@"), tag("@"))).parse(input) + } + + fn _parse_token(input: &str) -> IResult<&str, Option<&str>> { + opt(terminated(take_until(":"), tag(":"))).parse(input) + } + + fn _parse_hostname(input: &str) -> IResult<&str, Option<&str>> { + opt(take_till(|c| c == '/' || c == ':')).parse(input) + } + + fn _parse_port(input: &str) -> IResult<&str, Option<&str>> { + opt(preceded(tag(":"), take_while(|c: char| c.is_digit(10)))).parse(input) + } + + // This is making an assumption that the path is relative, not absolute + // This is bc we do not support absolute paths when we also have a port + fn _parse_ssh_path(input: &str) -> IResult<&str, Option<&str>> { + opt(preceded(one_of("/:"), rest)).parse(input) + } + + fn _parse_path(input: &str) -> IResult<&str, &str> { + rest(input) + } +} + +/// Build the printable GitUrl from its components +impl fmt::Display for GitUrl { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + //let scheme_prefix = match self.scheme_prefix { + // true => format!("{}://", self.scheme), + // false => String::new(), + //}; + + let scheme = if let Some(scheme) = &self.scheme() + && self.print_scheme().clone() + { + format!("{}://", scheme) + } else { + String::new() + }; + + //let scheme_prefix = if self.print_scheme && self.scheme_prefix { + // format!("{}://", self.scheme) + //} else { + // String::new() + //}; + + let auth_info = match self.scheme() { + Some(Scheme::Ssh) | Some(Scheme::Git) | Some(Scheme::GitSsh) => { + if let Some(user) = &self.user() { + format!("{user}@") + } else { + String::new() + } + } + Some(Scheme::Http) | Some(Scheme::Https) => match (&self.user(), &self.token()) { + (Some(user), Some(token)) => format!("{user}:{token}@"), + (Some(user), None) => format!("{user}@",), + (None, Some(token)) => format!("{token}@"), + (None, None) => String::new(), + }, + _ => String::new(), + }; + + let host = match &self.host() { + Some(host) => host.to_string(), + None => String::new(), + }; + + let port = match &self.port() { + Some(p) => format!(":{}", p), + None => String::new(), + }; + + let path = if self.scheme().clone() == Some(Scheme::Ssh) { + if self.port().is_some() { + if !self.path().starts_with('/') { + format!("/{}", &self.path()) + } else { + self.path().to_string() + } + } else { + format!(":{}", &self.path()) + } + } else { + self.path().to_string() + }; + + let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}"); + + write!(f, "{}", git_url_str) + } +} + +impl FromStr for GitUrl { + //type Err = GitUrlParseError; + type Err = GitUrlBuilderError; + + fn from_str(s: &str) -> Result { + GitUrl::parse(s) + } +} + +impl GitUrl { + /// Returns `GitUrl` after removing `user` and `token` values + /// Intended use-case is for non-destructive printing GitUrl excluding any embedded auth info + pub fn trim_auth(&self) -> GitUrl { + let mut new_giturl = self.clone(); + new_giturl.set_user(None); + new_giturl.set_token(None); + new_giturl + } + + /// Returns a `Result` after normalizing and parsing `url` for metadata + pub fn parse(url: &str) -> Result { + let giturl = GitUrlBuilder::parse(url).unwrap(); + giturl.build() + //// Normalize the url so we can use Url crate to process ssh urls + //let normalized = normalize_url(url)?; + + //// Some pre-processing for paths + //let scheme = if let Ok(scheme) = Scheme::from_str(normalized.scheme()) { + // scheme + //} else { + // return Err(GitUrlParseError::UnsupportedScheme( + // normalized.scheme().to_string(), + // )); + //}; + //if normalized.path().is_empty() { + // return Err(GitUrlParseError::EmptyPath); + //} + + //// Normalized ssh urls can always have their first '/' removed + //let urlpath = match &scheme { + // Scheme::Ssh => { + // // At the moment, we're relying on url::Url's parse() behavior to not duplicate + // // the leading '/' when we normalize + // normalized.path()[1..].to_string() + // } + // _ => normalized.path().to_string(), + //}; + + //let git_suffix_check = &urlpath.ends_with(".git"); + + //// Parse through path for name,owner,organization + //// Support organizations for Azure Devops + //#[cfg(feature = "tracing")] + //debug!("The urlpath: {:?}", &urlpath); + + //// Most git services use the path for metadata in the same way, so we're going to separate + //// the metadata + //// ex. github.com/accountname/reponame + //// owner = accountname + //// name = reponame + //// + //// organizations are going to be supported on a per-host basis + //let splitpath = &urlpath.rsplit_terminator('/').collect::>(); + + //#[cfg(feature = "tracing")] + //debug!("rsplit results for metadata: {:?}", splitpath); + + //let name = splitpath[0].trim_end_matches(".git").to_string(); + + //// TODO: I think here is where we want to update the url pattern identification step.. I want to be able to have a hint that the user can pass + + //let (owner, organization, fullname) = match &scheme { + // // We're not going to assume anything about metadata from a filepath + // Scheme::File => (None::, None::, name.clone()), + // _ => { + // let mut fullname: Vec<&str> = Vec::new(); + + // // TODO: Add support for parsing out orgs from these urls + // let hosts_w_organization_in_path = ["dev.azure.com", "ssh.dev.azure.com"]; + // //vec!["dev.azure.com", "ssh.dev.azure.com", "visualstudio.com"]; + + // let host_str = if let Some(host) = normalized.host_str() { + // host + // } else { + // return Err(GitUrlParseError::UnsupportedUrlHostFormat); + // }; + + // match hosts_w_organization_in_path.contains(&host_str) { + // true => { + // #[cfg(feature = "tracing")] + // debug!("Found a git provider with an org"); + + // // The path differs between git:// and https:// schemes + + // match &scheme { + // // Example: "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName", + // Scheme::Ssh => { + // // Organization + // fullname.push(splitpath[2]); + // // Project/Owner name + // fullname.push(splitpath[1]); + // // Repo name + // fullname.push(splitpath[0]); + + // ( + // Some(splitpath[1].to_string()), + // Some(splitpath[2].to_string()), + // fullname.join("/"), + // ) + // } + // // Example: "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName", + // Scheme::Https => { + // // Organization + // fullname.push(splitpath[3]); + // // Project/Owner name + // fullname.push(splitpath[2]); + // // Repo name + // fullname.push(splitpath[0]); + + // ( + // Some(splitpath[2].to_string()), + // Some(splitpath[3].to_string()), + // fullname.join("/"), + // ) + // } + + // // TODO: I'm not sure if I want to support throwing this error long-term + // _ => return Err(GitUrlParseError::UnexpectedScheme), + // } + // } + // false => { + // if !url.starts_with("ssh") && splitpath.len() < 2 { + // return Err(GitUrlParseError::UnexpectedFormat); + // } + + // let position = match splitpath.len() { + // 0 => return Err(GitUrlParseError::UnexpectedFormat), + // 1 => 0, + // _ => 1, + // }; + + // // push owner + // fullname.push(splitpath[position]); + // // push name + // fullname.push(name.as_str()); + + // ( + // Some(splitpath[position].to_string()), + // None::, + // fullname.join("/"), + // ) + // } + // } + // } + //}; + + //let final_host = match scheme { + // Scheme::File => None, + // _ => normalized.host_str().map(|h| h.to_string()), + //}; + + //let final_path = match scheme { + // Scheme::File => { + // if let Some(host) = normalized.host_str() { + // format!("{}{}", host, urlpath) + // } else { + // urlpath + // } + // } + // _ => urlpath, + //}; + + //Ok(GitUrl { + // host: final_host, + // name, + // owner, + // organization, + // fullname, + // scheme, + // user: match normalized.username().to_string().len() { + // 0 => None, + // _ => Some(normalized.username().to_string()), + // }, + // token: normalized.password().map(|p| p.to_string()), + // port: normalized.port(), + // path: final_path, + // git_suffix: *git_suffix_check, + // scheme_prefix: url.contains("://") || url.starts_with("git:"), + //}) + } +} + +/// `normalize_ssh_url` takes in an ssh url that separates the login info +/// from the path into with a `:` and replaces it with `/`. +/// +/// Prepends `ssh://` to url +/// +/// Supports absolute and relative paths +//fn normalize_ssh_url(url: &str) -> Result { +// let u = url.split(':').collect::>(); +// +// match u.len() { +// 2 => { +// #[cfg(feature = "tracing")] +// debug!("Normalizing ssh url: {:?}", u); +// normalize_url(&format!("ssh://{}/{}", u[0], u[1])) +// } +// 3 => { +// #[cfg(feature = "tracing")] +// debug!("Normalizing ssh url with ports: {:?}", u); +// normalize_url(&format!("ssh://{}:{}/{}", u[0], u[1], u[2])) +// } +// _default => Err(GitUrlParseError::UnsupportedSshUrlFormat), +// } +//} + +/// `normalize_file_path` takes in a filepath and uses `Url::from_file_path()` to parse +/// +/// Prepends `file://` to url +//#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] +//fn normalize_file_path(filepath: &str) -> Result { +// let fp = Url::from_file_path(filepath); +// +// match fp { +// Ok(path) => Ok(path), +// Err(_e) => { +// if let Ok(file_url) = normalize_url(&format!("file://{}", filepath)) { +// Ok(file_url) +// } else { +// Err(GitUrlParseError::FileUrlNormalizeFailedSchemeAdded) +// } +// } +// } +//} + +#[cfg(target_arch = "wasm32")] +fn normalize_file_path(_filepath: &str) -> Result { + unreachable!() +} + +///// `normalize_url` takes in url as `&str` and takes an opinionated approach to identify +///// `ssh://` or `file://` urls that require more information to be added so that +///// they can be parsed more effectively by `url::Url::parse()` +//pub fn normalize_url(url: &str) -> Result { +// #[cfg(feature = "tracing")] +// debug!("Processing: {:?}", &url); +// +// // TODO: Should this be extended to check for any whitespace? +// // Error if there are null bytes within the url +// // https://github.com/tjtelan/git-url-parse-rs/issues/16 +// if url.contains('\0') { +// return Err(GitUrlParseError::FoundNullBytes); +// } +// +// // We're going to remove any trailing slash before running through Url::parse +// let trim_url = url.trim_end_matches('/'); +// +// // TODO: Remove support for this form when I go to next major version. +// // I forget what it supports, and it isn't obvious after searching for examples +// // normalize short git url notation: git:host/path +// let url_to_parse = if trim_url.starts_with("git:") && !trim_url.starts_with("git://") { +// trim_url.replace("git:", "git://") +// } else { +// trim_url.to_string() +// }; +// +// let url_parse = Url::parse(&url_to_parse); +// +// Ok(match url_parse { +// Ok(u) => { +// match Scheme::from_str(u.scheme()) { +// Ok(_p) => u, +// Err(_e) => { +// // Catch case when an ssh url is given w/o a user +// #[cfg(feature = "tracing")] +// debug!("Scheme parse fail. Assuming a userless ssh url"); +// if let Ok(ssh_url) = normalize_ssh_url(trim_url) { +// ssh_url +// } else { +// return Err(GitUrlParseError::SshUrlNormalizeFailedNoScheme); +// } +// } +// } +// } +// +// // If we're here, we're only looking for Scheme::Ssh or Scheme::File +// // TODO: Add test for this +// Err(url::ParseError::RelativeUrlWithoutBase) => { +// // Assuming we have found Scheme::Ssh if we can find an "@" before ":" +// // Otherwise we have Scheme::File +// //let re = Regex::new(r"^\S+(@)\S+(:).*$").with_context(|| { +// // "Failed to build ssh git url regex for testing against url".to_string() +// //})?; +// +// match is_ssh_url(trim_url) { +// true => { +// #[cfg(feature = "tracing")] +// debug!("Scheme::SSH match for normalization"); +// normalize_ssh_url(trim_url)? +// } +// false => { +// #[cfg(feature = "tracing")] +// debug!("Scheme::File match for normalization"); +// normalize_file_path(trim_url)? +// } +// } +// } +// Err(err) => { +// return Err(GitUrlParseError::from(err)); +// } +// }) +//} + +// Valid ssh `url` for cloning have a usernames, +// but we don't require it classification or parsing purposes +// However a path must be specified with a `:` +//fn is_ssh_url(url: &str) -> bool { +// // if we do not have a path +// if !url.contains(':') { +// return false; +// } +// +// // if we have a username, expect it before the path (Are usernames with colons valid?) +// if let (Some(at_pos), Some(colon_pos)) = (url.find('@'), url.find(':')) { +// if colon_pos < at_pos { +// return false; +// } +// +// // Make sure we provided a username, and not just `@` +// let parts: Vec<&str> = url.split('@').collect(); +// return parts.len() == 2 || parts[0].is_empty(); +// } +// +// // it's an ssh url if we have a domain:path pattern +// let parts: Vec<&str> = url.split(':').collect(); +// +// // FIXME: I am not sure how to validate a url with a port +// //if parts.len() != 3 && !parts[0].is_empty() && !parts[1].is_empty() && !parts[2].is_empty() { +// // return false; +// //} +// +// // This should also handle if a port is specified +// // no port example: ssh://user@domain:path/to/repo.git +// // port example: ssh://user@domain:port/path/to/repo.git +// parts.len() == 2 && parts[0].is_empty() && parts[1].is_empty() +//} diff --git a/tests/parse.rs b/tests/parse.rs index 8874d8d..0eddb17 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -4,11 +4,11 @@ fn ssh_user_ports() { let test_url = "ssh://git@host.tld:9999/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::Ssh)) - .host(Some(String::from("host.tld"))) - .user(Some(String::from("git"))) - .port(Some(9999)) - .path(String::from("user/project-name.git")) + .scheme(Scheme::Ssh) + .host("host.tld") + .user("git") + .port(9999 as u16) + .path("user/project-name.git") .print_scheme(true) .build() .unwrap(); @@ -21,10 +21,9 @@ fn ssh_no_scheme_no_user() { let test_url = "host.tld:user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::Ssh)) - .host(Some(String::from("host.tld"))) - .path(String::from("user/project-name.git")) - .print_scheme(false) + .scheme(Scheme::Ssh) + .host("host.tld") + .path("user/project-name.git") .build() .unwrap(); @@ -37,10 +36,10 @@ fn https_user_bitbucket() { let test_url = "https://user@bitbucket.org/user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::Https)) - .host(Some(String::from("bitbucket.org"))) - .user(Some(String::from("user"))) - .path(String::from("/user/repo.git")) + .scheme(Scheme::Https) + .host("bitbucket.org") + .user("user") + .path("/user/repo.git") .print_scheme(true) .build() .unwrap(); @@ -53,10 +52,10 @@ fn ssh_user_bitbucket() { let test_url = "git@bitbucket.org:user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .host(Some(String::from("bitbucket.org"))) - .scheme(Some(Scheme::Ssh)) - .user(Some(String::from("git"))) - .path(String::from("user/repo.git")) + .scheme(Scheme::Ssh) + .host("bitbucket.org") + .user("git") + .path("user/repo.git") .build() .unwrap(); @@ -68,11 +67,11 @@ fn https_user_auth_bitbucket() { let test_url = "https://x-token-auth:token@bitbucket.org/owner/name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::Https)) - .host(Some("bitbucket.org".to_string())) - .user(String::from("x-token-auth")) - .token(String::from("token")) - .path(String::from("/owner/name.git")) + .scheme(Scheme::Https) + .host("bitbucket.org") + .user("x-token-auth") + .token("token") + .path("/owner/name.git") .print_scheme(true) .build() .unwrap(); @@ -85,10 +84,10 @@ fn https_user_github() { let test_url = "https://user@github.com/user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::Https)) - .user(Some(String::from("user"))) - .host(Some(String::from("github.com"))) - .path(String::from("/user/repo.git")) + .scheme(Scheme::Https) + .user("user") + .host("github.com") + .path("/user/repo.git") .print_scheme(true) .build() .unwrap(); @@ -101,10 +100,10 @@ fn ssh_user_github() { let test_url = "git@github.com:user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::Ssh)) - .user(Some(String::from("git"))) - .host(Some(String::from("github.com"))) - .path(String::from("user/repo.git")) + .scheme(Scheme::Ssh) + .user("git") + .host("github.com") + .path("user/repo.git") .build() .unwrap(); @@ -116,11 +115,11 @@ fn https_user_auth_github() { let test_url = "https://token:x-oauth-basic@github.com/owner/name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::Https)) - .user(Some(String::from("token"))) - .token(Some(String::from("x-oauth-basic"))) - .host(Some(String::from("github.com"))) - .path(String::from("/owner/name.git")) + .scheme(Scheme::Https) + .user("token") + .token("x-oauth-basic") + .host("github.com") + .path("/owner/name.git") .print_scheme(true) .build() .unwrap(); @@ -133,10 +132,10 @@ fn ssh_user_azure_devops() { let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::Ssh)) - .user(Some(String::from("git"))) - .host(Some(String::from("ssh.dev.azure.com"))) - .path(String::from("v3/CompanyName/ProjectName/RepoName")) + .scheme(Scheme::Ssh) + .user("git") + .host("ssh.dev.azure.com") + .path("v3/CompanyName/ProjectName/RepoName") .build() .unwrap(); @@ -148,10 +147,10 @@ fn https_user_azure_devops() { let test_url = "https://organization@dev.azure.com/organization/project/_git/repo"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::Https)) - .user(Some(String::from("organization"))) - .host(Some(String::from("dev.azure.com"))) - .path(String::from("/organization/project/_git/repo")) + .scheme(Scheme::Https) + .user("organization") + .host("dev.azure.com") + .path("/organization/project/_git/repo") .print_scheme(true) .build() .unwrap(); @@ -164,10 +163,10 @@ fn ftp_user() { let test_url = "ftp://git@host.tld/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::Ftp)) - .user(Some(String::from("git"))) - .host(Some(String::from("host.tld"))) - .path(String::from("/user/project-name.git")) + .scheme(Scheme::Ftp) + .user("git") + .host("host.tld") + .path("/user/project-name.git") .print_scheme(true) .build() .unwrap(); @@ -180,10 +179,10 @@ fn ftps_user() { let test_url = "ftps://git@host.tld/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::Ftps)) - .user(Some(String::from("git"))) - .host(Some(String::from("host.tld"))) - .path(String::from("/user/project-name.git")) + .scheme(Scheme::Ftps) + .user("git") + .host("host.tld") + .path("/user/project-name.git") .print_scheme(true) .build() .unwrap(); @@ -196,8 +195,8 @@ fn relative_unix_path() { let test_url = "../project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::File)) - .path(String::from("../project-name.git")) + .scheme(Scheme::File) + .path("../project-name.git") .build() .unwrap(); @@ -209,8 +208,8 @@ fn absolute_unix_path() { let test_url = "/path/to/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::File)) - .path(String::from("/path/to/project-name.git")) + .scheme(Scheme::File) + .path("/path/to/project-name.git") .build() .unwrap(); @@ -223,8 +222,8 @@ fn relative_windows_path() { let test_url = "..\\project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::File)) - .path(String::from("../project-name.git")) + .scheme(Scheme::File) + .path("../project-name.git") .build() .unwrap(); @@ -239,8 +238,8 @@ fn absolute_windows_path() { let test_url = "c:\\project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::File)) - .path(String::from("c:\\project-name.git")) + .scheme(Scheme::File) + .path("c:\\project-name.git") .build() .unwrap(); @@ -310,9 +309,9 @@ fn git() { let test_url = "git://github.com/owner/name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Some(Scheme::Git)) - .host(Some(String::from("github.com"))) - .path(String::from("/owner/name.git")) + .scheme(Scheme::Git) + .host("github.com") + .path("/owner/name.git") .print_scheme(true) .build() .unwrap(); From d96eca7618aaccc1d6f887f44ec14dd581908766 Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Fri, 22 Aug 2025 17:30:49 -0700 Subject: [PATCH 08/32] Add provider --- src/lib.rs | 5 +- src/provider/mod.rs | 3 - src/types/error.rs | 12 +- src/types/mod.rs | 368 +++++++++++++++++++------------------- src/types/provider/mod.rs | 34 ++++ tests/mod.rs | 1 + tests/provider.rs | 12 ++ 7 files changed, 243 insertions(+), 192 deletions(-) delete mode 100644 src/provider/mod.rs create mode 100644 src/types/provider/mod.rs create mode 100644 tests/provider.rs diff --git a/src/lib.rs b/src/lib.rs index 3f789d8..6e8f520 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ -mod provider; mod types; -pub use types::{GitUrl, GitUrlBuilder, GitUrlBuilderError, GitUrlParseError, Scheme}; +pub use types::{ + GenericProvider, GitUrl, GitUrlBuilder, GitUrlBuilderError, GitUrlParseError, Scheme, +}; #[cfg(feature = "tracing")] use tracing::debug; diff --git a/src/provider/mod.rs b/src/provider/mod.rs deleted file mode 100644 index db356dd..0000000 --- a/src/provider/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -// generic -// gitlab (subgroups) style -// azure devops diff --git a/src/types/error.rs b/src/types/error.rs index 81c7657..7559815 100644 --- a/src/types/error.rs +++ b/src/types/error.rs @@ -1,9 +1,10 @@ +use super::GitUrlBuilderError; use thiserror::Error; -#[derive(Error, Debug, PartialEq, Eq)] +#[derive(Error, Debug)] pub enum GitUrlParseError { - //#[error("Error from derive_builder")] - //DeriveBuilderError(#[from] derive_builder::UninitializedFieldError), + #[error("Error from derive_builder")] + DeriveBuilderError(#[from] GitUrlBuilderError), //#[error("Error from Url crate: {0}")] //UrlParseError(#[from] url::ParseError), @@ -22,9 +23,8 @@ pub enum GitUrlParseError { //#[error("No url scheme was found, then failed to normalize as file url after adding 'file://'")] //FileUrlNormalizeFailedSchemeAdded, - - //#[error("Git Url not in expected format")] - //UnexpectedFormat, + #[error("Git Url not in expected format")] + UnexpectedFormat, // FIXME: Keep an eye on this error for removal #[error("Git Url for host using unexpected scheme")] diff --git a/src/types/mod.rs b/src/types/mod.rs index dc7fe47..8a4ad2b 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -1,5 +1,8 @@ mod error; +mod provider; + pub use error::GitUrlParseError; +pub use provider::{GenericProvider, GitProvider}; use derive_builder::Builder; use getset::{Getters, Setters}; @@ -199,7 +202,6 @@ impl GitUrlBuilder { let scheme = Scheme::from_str(s).expect("Unknown scheme"); - // todo: pass in hint! *hint = match &scheme { Scheme::Ssh => GitUrlParseHint::Sshlike, Scheme::File => GitUrlParseHint::Filelike, @@ -255,21 +257,26 @@ impl GitUrlBuilder { builder.port(u16::from_str(port).expect("Not a valid port")); save = leftover; + // If we're currently uncertain, but we've found a port + // our guess is this more likely is an http url than an ssh url + // Add the `ssh://` scheme to the url if this is incorrect if *hint == GitUrlParseHint::Unknown { *hint = GitUrlParseHint::Httplike; } } } + // https://mslinn.com/git/040-git-urls.html - we only support relative paths when we have ports + if builder.port.is_none() && save.starts_with(":") { + *hint = GitUrlParseHint::Sshlike; + } + *self = builder; *working_url = save; } fn parse_ssh_path(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { let mut builder = self.clone(); - // https://mslinn.com/git/040-git-urls.html - we only support relative paths when we have ports - //if let Some(ssh_path) = working_url.strip_prefix(":") { - //} if let Ok((_leftover, Some(path))) = GitUrlBuilder::_parse_ssh_path(working_url) { builder.scheme(Scheme::Ssh); @@ -341,11 +348,6 @@ impl GitUrlBuilder { /// Build the printable GitUrl from its components impl fmt::Display for GitUrl { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - //let scheme_prefix = match self.scheme_prefix { - // true => format!("{}://", self.scheme), - // false => String::new(), - //}; - let scheme = if let Some(scheme) = &self.scheme() && self.print_scheme().clone() { @@ -354,12 +356,6 @@ impl fmt::Display for GitUrl { String::new() }; - //let scheme_prefix = if self.print_scheme && self.scheme_prefix { - // format!("{}://", self.scheme) - //} else { - // String::new() - //}; - let auth_info = match self.scheme() { Some(Scheme::Ssh) | Some(Scheme::Git) | Some(Scheme::GitSsh) => { if let Some(user) = &self.user() { @@ -430,173 +426,183 @@ impl GitUrl { pub fn parse(url: &str) -> Result { let giturl = GitUrlBuilder::parse(url).unwrap(); giturl.build() - //// Normalize the url so we can use Url crate to process ssh urls - //let normalized = normalize_url(url)?; - - //// Some pre-processing for paths - //let scheme = if let Ok(scheme) = Scheme::from_str(normalized.scheme()) { - // scheme - //} else { - // return Err(GitUrlParseError::UnsupportedScheme( - // normalized.scheme().to_string(), - // )); - //}; - //if normalized.path().is_empty() { - // return Err(GitUrlParseError::EmptyPath); - //} - - //// Normalized ssh urls can always have their first '/' removed - //let urlpath = match &scheme { - // Scheme::Ssh => { - // // At the moment, we're relying on url::Url's parse() behavior to not duplicate - // // the leading '/' when we normalize - // normalized.path()[1..].to_string() - // } - // _ => normalized.path().to_string(), - //}; - - //let git_suffix_check = &urlpath.ends_with(".git"); - - //// Parse through path for name,owner,organization - //// Support organizations for Azure Devops - //#[cfg(feature = "tracing")] - //debug!("The urlpath: {:?}", &urlpath); - - //// Most git services use the path for metadata in the same way, so we're going to separate - //// the metadata - //// ex. github.com/accountname/reponame - //// owner = accountname - //// name = reponame - //// - //// organizations are going to be supported on a per-host basis - //let splitpath = &urlpath.rsplit_terminator('/').collect::>(); - - //#[cfg(feature = "tracing")] - //debug!("rsplit results for metadata: {:?}", splitpath); - - //let name = splitpath[0].trim_end_matches(".git").to_string(); - - //// TODO: I think here is where we want to update the url pattern identification step.. I want to be able to have a hint that the user can pass - - //let (owner, organization, fullname) = match &scheme { - // // We're not going to assume anything about metadata from a filepath - // Scheme::File => (None::, None::, name.clone()), - // _ => { - // let mut fullname: Vec<&str> = Vec::new(); - - // // TODO: Add support for parsing out orgs from these urls - // let hosts_w_organization_in_path = ["dev.azure.com", "ssh.dev.azure.com"]; - // //vec!["dev.azure.com", "ssh.dev.azure.com", "visualstudio.com"]; - - // let host_str = if let Some(host) = normalized.host_str() { - // host - // } else { - // return Err(GitUrlParseError::UnsupportedUrlHostFormat); - // }; - - // match hosts_w_organization_in_path.contains(&host_str) { - // true => { - // #[cfg(feature = "tracing")] - // debug!("Found a git provider with an org"); - - // // The path differs between git:// and https:// schemes - - // match &scheme { - // // Example: "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName", - // Scheme::Ssh => { - // // Organization - // fullname.push(splitpath[2]); - // // Project/Owner name - // fullname.push(splitpath[1]); - // // Repo name - // fullname.push(splitpath[0]); - - // ( - // Some(splitpath[1].to_string()), - // Some(splitpath[2].to_string()), - // fullname.join("/"), - // ) - // } - // // Example: "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName", - // Scheme::Https => { - // // Organization - // fullname.push(splitpath[3]); - // // Project/Owner name - // fullname.push(splitpath[2]); - // // Repo name - // fullname.push(splitpath[0]); - - // ( - // Some(splitpath[2].to_string()), - // Some(splitpath[3].to_string()), - // fullname.join("/"), - // ) - // } - - // // TODO: I'm not sure if I want to support throwing this error long-term - // _ => return Err(GitUrlParseError::UnexpectedScheme), - // } - // } - // false => { - // if !url.starts_with("ssh") && splitpath.len() < 2 { - // return Err(GitUrlParseError::UnexpectedFormat); - // } - - // let position = match splitpath.len() { - // 0 => return Err(GitUrlParseError::UnexpectedFormat), - // 1 => 0, - // _ => 1, - // }; - - // // push owner - // fullname.push(splitpath[position]); - // // push name - // fullname.push(name.as_str()); - - // ( - // Some(splitpath[position].to_string()), - // None::, - // fullname.join("/"), - // ) - // } - // } - // } - //}; - - //let final_host = match scheme { - // Scheme::File => None, - // _ => normalized.host_str().map(|h| h.to_string()), - //}; - - //let final_path = match scheme { - // Scheme::File => { - // if let Some(host) = normalized.host_str() { - // format!("{}{}", host, urlpath) - // } else { - // urlpath - // } - // } - // _ => urlpath, - //}; - - //Ok(GitUrl { - // host: final_host, - // name, - // owner, - // organization, - // fullname, - // scheme, - // user: match normalized.username().to_string().len() { - // 0 => None, - // _ => Some(normalized.username().to_string()), - // }, - // token: normalized.password().map(|p| p.to_string()), - // port: normalized.port(), - // path: final_path, - // git_suffix: *git_suffix_check, - // scheme_prefix: url.contains("://") || url.starts_with("git:"), - //}) } + + pub fn provider_info(&self) -> Result + where + T: GitProvider, + { + let g = T::from_git_url(&self); + Ok(g) + //Err(GitUrlParseError::UnexpectedFormat) + } + + //// Normalize the url so we can use Url crate to process ssh urls + //let normalized = normalize_url(url)?; + + //// Some pre-processing for paths + //let scheme = if let Ok(scheme) = Scheme::from_str(normalized.scheme()) { + // scheme + //} else { + // return Err(GitUrlParseError::UnsupportedScheme( + // normalized.scheme().to_string(), + // )); + //}; + //if normalized.path().is_empty() { + // return Err(GitUrlParseError::EmptyPath); + //} + + //// Normalized ssh urls can always have their first '/' removed + //let urlpath = match &scheme { + // Scheme::Ssh => { + // // At the moment, we're relying on url::Url's parse() behavior to not duplicate + // // the leading '/' when we normalize + // normalized.path()[1..].to_string() + // } + // _ => normalized.path().to_string(), + //}; + + //let git_suffix_check = &urlpath.ends_with(".git"); + + //// Parse through path for name,owner,organization + //// Support organizations for Azure Devops + //#[cfg(feature = "tracing")] + //debug!("The urlpath: {:?}", &urlpath); + + //// Most git services use the path for metadata in the same way, so we're going to separate + //// the metadata + //// ex. github.com/accountname/reponame + //// owner = accountname + //// name = reponame + //// + //// organizations are going to be supported on a per-host basis + //let splitpath = &urlpath.rsplit_terminator('/').collect::>(); + + //#[cfg(feature = "tracing")] + //debug!("rsplit results for metadata: {:?}", splitpath); + + //let name = splitpath[0].trim_end_matches(".git").to_string(); + + //// TODO: I think here is where we want to update the url pattern identification step.. I want to be able to have a hint that the user can pass + + //let (owner, organization, fullname) = match &scheme { + // // We're not going to assume anything about metadata from a filepath + // Scheme::File => (None::, None::, name.clone()), + // _ => { + // let mut fullname: Vec<&str> = Vec::new(); + + // // TODO: Add support for parsing out orgs from these urls + // let hosts_w_organization_in_path = ["dev.azure.com", "ssh.dev.azure.com"]; + // //vec!["dev.azure.com", "ssh.dev.azure.com", "visualstudio.com"]; + + // let host_str = if let Some(host) = normalized.host_str() { + // host + // } else { + // return Err(GitUrlParseError::UnsupportedUrlHostFormat); + // }; + + // match hosts_w_organization_in_path.contains(&host_str) { + // true => { + // #[cfg(feature = "tracing")] + // debug!("Found a git provider with an org"); + + // // The path differs between git:// and https:// schemes + + // match &scheme { + // // Example: "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName", + // Scheme::Ssh => { + // // Organization + // fullname.push(splitpath[2]); + // // Project/Owner name + // fullname.push(splitpath[1]); + // // Repo name + // fullname.push(splitpath[0]); + + // ( + // Some(splitpath[1].to_string()), + // Some(splitpath[2].to_string()), + // fullname.join("/"), + // ) + // } + // // Example: "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName", + // Scheme::Https => { + // // Organization + // fullname.push(splitpath[3]); + // // Project/Owner name + // fullname.push(splitpath[2]); + // // Repo name + // fullname.push(splitpath[0]); + + // ( + // Some(splitpath[2].to_string()), + // Some(splitpath[3].to_string()), + // fullname.join("/"), + // ) + // } + + // // TODO: I'm not sure if I want to support throwing this error long-term + // _ => return Err(GitUrlParseError::UnexpectedScheme), + // } + // } + // false => { + // if !url.starts_with("ssh") && splitpath.len() < 2 { + // return Err(GitUrlParseError::UnexpectedFormat); + // } + + // let position = match splitpath.len() { + // 0 => return Err(GitUrlParseError::UnexpectedFormat), + // 1 => 0, + // _ => 1, + // }; + + // // push owner + // fullname.push(splitpath[position]); + // // push name + // fullname.push(name.as_str()); + + // ( + // Some(splitpath[position].to_string()), + // None::, + // fullname.join("/"), + // ) + // } + // } + // } + //}; + + //let final_host = match scheme { + // Scheme::File => None, + // _ => normalized.host_str().map(|h| h.to_string()), + //}; + + //let final_path = match scheme { + // Scheme::File => { + // if let Some(host) = normalized.host_str() { + // format!("{}{}", host, urlpath) + // } else { + // urlpath + // } + // } + // _ => urlpath, + //}; + + //Ok(GitUrl { + // host: final_host, + // name, + // owner, + // organization, + // fullname, + // scheme, + // user: match normalized.username().to_string().len() { + // 0 => None, + // _ => Some(normalized.username().to_string()), + // }, + // token: normalized.password().map(|p| p.to_string()), + // port: normalized.port(), + // path: final_path, + // git_suffix: *git_suffix_check, + // scheme_prefix: url.contains("://") || url.starts_with("git:"), + //}) } /// `normalize_ssh_url` takes in an ssh url that separates the login info diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs new file mode 100644 index 0000000..6b6c327 --- /dev/null +++ b/src/types/provider/mod.rs @@ -0,0 +1,34 @@ +// generic +// gitlab (subgroups) style +// azure devops + +use crate::{GitUrl, GitUrlParseError}; + +pub trait GitProvider: Clone + std::fmt::Debug { + fn from_git_url(url: &GitUrl) -> Self + where + Self: Sized; + //fn get_url(&self, provider: &str + //fn register(&self); + //fn unregister(&self);o + fn to_obj(&self) -> Box { + Box::new(self.clone()) + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Default)] +pub struct GenericProvider { + host: String, + user: String, + repo: String, +} + +impl GitProvider for GenericProvider { + fn from_git_url(url: &GitUrl) -> Self { + GenericProvider { + host: String::from(""), + user: String::from(""), + repo: String::from(""), + } + } +} diff --git a/tests/mod.rs b/tests/mod.rs index b265223..5f6f8f9 100644 --- a/tests/mod.rs +++ b/tests/mod.rs @@ -1,3 +1,4 @@ //mod normalize; mod parse; +mod provider; mod trim_auth; diff --git a/tests/provider.rs b/tests/provider.rs new file mode 100644 index 0000000..eda19d2 --- /dev/null +++ b/tests/provider.rs @@ -0,0 +1,12 @@ +use git_url_parse::*; + +#[test] +fn generic_git() { + let test_url = "ssh://git@host.tld:9999/user/project-name.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + + let provider_info = parsed.provider_info::().unwrap(); + let expected = GenericProvider::default(); + assert_eq!(provider_info, expected) + //let provider = parsed +} From b8744b5a9836eba8f3b13d1873d49c855d77612c Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Sat, 23 Aug 2025 23:20:46 -0700 Subject: [PATCH 09/32] passing tests for generic provider --- src/types/mod.rs | 3 +-- src/types/provider/mod.rs | 42 +++++++++++++++++++++++++++++---------- tests/provider.rs | 25 +++++++++++++++++++---- 3 files changed, 53 insertions(+), 17 deletions(-) diff --git a/src/types/mod.rs b/src/types/mod.rs index 8a4ad2b..89ea884 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -432,8 +432,7 @@ impl GitUrl { where T: GitProvider, { - let g = T::from_git_url(&self); - Ok(g) + T::from_git_url(&self) //Err(GitUrlParseError::UnexpectedFormat) } diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index 6b6c327..d5de35c 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -2,12 +2,18 @@ // gitlab (subgroups) style // azure devops +use nom::branch::alt; +use nom::bytes::complete::{is_not, tag, take_till, take_until, take_while}; +use nom::character::complete::{alphanumeric1, anychar, one_of}; +use nom::combinator::recognize; +use nom::multi::many0; +use nom::sequence::{preceded, separated_pair, terminated}; +use nom::{IResult, Parser, combinator::opt, combinator::rest}; + use crate::{GitUrl, GitUrlParseError}; pub trait GitProvider: Clone + std::fmt::Debug { - fn from_git_url(url: &GitUrl) -> Self - where - Self: Sized; + fn from_git_url(url: &GitUrl) -> Result; //fn get_url(&self, provider: &str //fn register(&self); //fn unregister(&self);o @@ -16,19 +22,33 @@ pub trait GitProvider: Clone + std::fmt::Debug { } } +// todo: builder #[derive(Clone, Debug, PartialEq, Eq, Default)] pub struct GenericProvider { - host: String, - user: String, - repo: String, + pub host: String, + pub user: String, + pub repo: String, +} +impl GenericProvider { + fn _get_user_repo(input: &str) -> IResult<&str, Option<(&str, &str)>> { + let (n, _) = opt(tag("/")).parse(input)?; + opt(separated_pair(is_not("/"), tag("/"), rest)).parse(n) + } } impl GitProvider for GenericProvider { - fn from_git_url(url: &GitUrl) -> Self { - GenericProvider { - host: String::from(""), - user: String::from(""), - repo: String::from(""), + fn from_git_url(url: &GitUrl) -> Result { + if let (Ok((_, Some((user, repo)))), Some(host)) = + (GenericProvider::_get_user_repo(url.path()), url.host()) + { + Ok(GenericProvider { + host: host.clone(), + user: String::from(user), + repo: String::from(repo), + }) + } else { + // TODO: Check this error type later + Err(GitUrlParseError::UnexpectedFormat) } } } diff --git a/tests/provider.rs b/tests/provider.rs index eda19d2..1d4f9aa 100644 --- a/tests/provider.rs +++ b/tests/provider.rs @@ -1,12 +1,29 @@ use git_url_parse::*; #[test] -fn generic_git() { - let test_url = "ssh://git@host.tld:9999/user/project-name.git"; +fn http_generic_git() { + let test_url = "https://github.com/tjtelan/git-url-parse-rs.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let provider_info = parsed.provider_info::().unwrap(); - let expected = GenericProvider::default(); + let expected = GenericProvider { + host: "github.com".to_string(), + user: "tjtelan".to_string(), + repo: "git-url-parse-rs.git".to_string(), + }; + assert_eq!(provider_info, expected) +} + +#[test] +fn ssh_generic_git() { + let test_url = "git@github.com:tjtelan/git-url-parse-rs.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + + let provider_info = parsed.provider_info::().unwrap(); + let expected = GenericProvider { + host: "github.com".to_string(), + user: "tjtelan".to_string(), + repo: "git-url-parse-rs.git".to_string(), + }; assert_eq!(provider_info, expected) - //let provider = parsed } From 6ba1c8f80b571eea4a0aa7b978c2410c42ca7af5 Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Tue, 26 Aug 2025 20:17:08 -0700 Subject: [PATCH 10/32] Default and custom provider parsing and test --- src/lib.rs | 3 ++- src/types/mod.rs | 26 ++++++++++++++--------- src/types/provider/mod.rs | 14 +++++++------ tests/provider.rs | 43 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 67 insertions(+), 19 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6e8f520..9963cff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ mod types; pub use types::{ - GenericProvider, GitUrl, GitUrlBuilder, GitUrlBuilderError, GitUrlParseError, Scheme, + GenericProvider, GitProvider, GitUrl, GitUrlBuilder, GitUrlBuilderError, GitUrlParseError, + Scheme, }; #[cfg(feature = "tracing")] diff --git a/src/types/mod.rs b/src/types/mod.rs index 89ea884..146e6e4 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -63,7 +63,10 @@ pub(crate) enum GitUrlParseHint { #[derive(Debug, PartialEq, Eq, Clone, Builder, Default, Getters, Setters)] #[builder(build_fn(validate = "Self::prebuild_check"), field(public))] #[get = "pub"] -pub struct GitUrl { +pub struct GitUrl

+where + P: GitProvider, +{ /// The fully qualified domain name (FQDN) or IP of the repo #[builder(setter(into, strip_option), default)] host: Option, @@ -98,9 +101,12 @@ pub struct GitUrl { //pub scheme_prefix: bool, #[builder(default)] print_scheme: bool, + + #[builder(setter(into, strip_option), default)] + provider: Option

, } -impl GitUrlBuilder { +impl> GitUrlBuilder

{ pub fn trim_auth(&mut self) { self.user = None; self.token = None; @@ -197,7 +203,7 @@ impl GitUrlBuilder { fn parse_scheme(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { let mut builder = self.clone(); - if let Ok((leftover, Some(s))) = GitUrlBuilder::_parse_scheme(working_url) { + if let Ok((leftover, Some(s))) = GitUrlBuilder::

::_parse_scheme(working_url) { println!("leftover: {leftover}, scheme: {s:?}"); let scheme = Scheme::from_str(s).expect("Unknown scheme"); @@ -218,7 +224,7 @@ impl GitUrlBuilder { fn parse_auth_info(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { let mut builder = self.clone(); - if let Ok((leftover, Some(username))) = GitUrlBuilder::_parse_username(working_url) { + if let Ok((leftover, Some(username))) = GitUrlBuilder::

::_parse_username(working_url) { println!("leftover: {leftover}, username: {username:?}"); builder.user(username); @@ -226,7 +232,7 @@ impl GitUrlBuilder { *hint = GitUrlParseHint::Sshlike; } - if let Ok((token, Some(real_username))) = GitUrlBuilder::_parse_token(username) { + if let Ok((token, Some(real_username))) = GitUrlBuilder::

::_parse_token(username) { println!("token: {token}, real_username: {real_username:?}"); builder.user(real_username); builder.token(token); @@ -245,13 +251,13 @@ impl GitUrlBuilder { let mut builder = self.clone(); let mut save = working_url.clone(); - if let Ok((leftover, Some(hostname))) = GitUrlBuilder::_parse_hostname(save) { + if let Ok((leftover, Some(hostname))) = GitUrlBuilder::

::_parse_hostname(save) { println!("leftover {leftover}, hostname: {hostname}"); builder.host(hostname); save = leftover; } - if let Ok((leftover, Some(port))) = GitUrlBuilder::_parse_port(save) { + if let Ok((leftover, Some(port))) = GitUrlBuilder::

::_parse_port(save) { if !port.is_empty() { println!("leftover {leftover}, port: {port}"); builder.port(u16::from_str(port).expect("Not a valid port")); @@ -278,7 +284,7 @@ impl GitUrlBuilder { fn parse_ssh_path(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { let mut builder = self.clone(); - if let Ok((_leftover, Some(path))) = GitUrlBuilder::_parse_ssh_path(working_url) { + if let Ok((_leftover, Some(path))) = GitUrlBuilder::

::_parse_ssh_path(working_url) { builder.scheme(Scheme::Ssh); *self = builder; @@ -288,7 +294,7 @@ impl GitUrlBuilder { fn parse_path(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { let mut builder = self.clone(); - if let Ok((leftover, path)) = GitUrlBuilder::_parse_path(working_url) { + if let Ok((leftover, path)) = GitUrlBuilder::

::_parse_path(working_url) { println!("leftover {leftover}, path: {path}"); builder.path(path); @@ -430,7 +436,7 @@ impl GitUrl { pub fn provider_info(&self) -> Result where - T: GitProvider, + T: GitProvider, { T::from_git_url(&self) //Err(GitUrlParseError::UnexpectedFormat) diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index d5de35c..a4ae4fa 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -12,14 +12,14 @@ use nom::{IResult, Parser, combinator::opt, combinator::rest}; use crate::{GitUrl, GitUrlParseError}; -pub trait GitProvider: Clone + std::fmt::Debug { - fn from_git_url(url: &GitUrl) -> Result; +pub trait GitProvider: Clone + std::fmt::Debug { + fn from_git_url(url: &T) -> Result; //fn get_url(&self, provider: &str //fn register(&self); //fn unregister(&self);o - fn to_obj(&self) -> Box { - Box::new(self.clone()) - } + //fn to_obj(&self) -> Box { + // Box::new(self.clone()) + //} } // todo: builder @@ -34,9 +34,11 @@ impl GenericProvider { let (n, _) = opt(tag("/")).parse(input)?; opt(separated_pair(is_not("/"), tag("/"), rest)).parse(n) } + + // fn _get_path_segment } -impl GitProvider for GenericProvider { +impl GitProvider for GenericProvider { fn from_git_url(url: &GitUrl) -> Result { if let (Ok((_, Some((user, repo)))), Some(host)) = (GenericProvider::_get_user_repo(url.path()), url.host()) diff --git a/tests/provider.rs b/tests/provider.rs index 1d4f9aa..178c834 100644 --- a/tests/provider.rs +++ b/tests/provider.rs @@ -5,7 +5,7 @@ fn http_generic_git() { let test_url = "https://github.com/tjtelan/git-url-parse-rs.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let provider_info = parsed.provider_info::().unwrap(); + let provider_info: GenericProvider = parsed.provider_info().unwrap(); let expected = GenericProvider { host: "github.com".to_string(), user: "tjtelan".to_string(), @@ -19,7 +19,7 @@ fn ssh_generic_git() { let test_url = "git@github.com:tjtelan/git-url-parse-rs.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let provider_info = parsed.provider_info::().unwrap(); + let provider_info: GenericProvider = parsed.provider_info().unwrap(); let expected = GenericProvider { host: "github.com".to_string(), user: "tjtelan".to_string(), @@ -27,3 +27,42 @@ fn ssh_generic_git() { }; assert_eq!(provider_info, expected) } + +#[test] +fn custom_provider() { + #[derive(Debug, Clone, PartialEq, Eq)] + struct TestProvider; + impl GitProvider for TestProvider { + fn from_git_url(_url: &GitUrl) -> Result { + Ok(Self) + } + } + + let test_url = "git@github.com:tjtelan/git-url-parse-rs.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + + let provider_info: TestProvider = parsed.provider_info().unwrap(); + let expected = TestProvider; + assert_eq!(provider_info, expected) +} + +// Azure Devops +// https://learn.microsoft.com/en-us/azure/devops/repos/git/clone?view=azure-devops&tabs=visual-studio-2022 +// https://learn.microsoft.com/en-us/azure/devops/release-notes/2018/sep-10-azure-devops-launch#administration + +// GitHub +// https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository + +// GitLab +// https://docs.gitlab.com/topics/git/clone/#clone-with-ssh +// https://gitlab.com/explore/projects/trending?sort=latest_activity_desc +// https://gitlab.com/redhat/red-hat-ci-tools/kernel + +// BitBucket +// https://confluence.atlassian.com/bitbucketserver/clone-a-repository-790632786.html + +// SourceForge +// https://sourceforge.net/p/forge/documentation/Git/#h-how-to-clone-an-existing-repository + +// Codeberg +// https://codeberg.org/explore/repos From b5d5842501908379529f14e35754057fab44f8fa Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Fri, 29 Aug 2025 12:33:56 -0700 Subject: [PATCH 11/32] Remaining provider tests stubbed out --- Cargo.toml | 1 + src/lib.rs | 2 +- src/types/mod.rs | 326 +------------------------------------- src/types/provider/mod.rs | 94 +++++++++-- tests/mod.rs | 1 - tests/normalize.rs | 186 ---------------------- tests/provider.rs | 134 ++++++++++++++-- 7 files changed, 207 insertions(+), 537 deletions(-) delete mode 100644 tests/normalize.rs diff --git a/Cargo.toml b/Cargo.toml index 0d78ed1..18f6289 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ thiserror = "2" nom = "8" derive_builder = "0.20" getset = "0.1.6" +typed-path = "0.11.0" [dev-dependencies] env_logger = "0.11" diff --git a/src/lib.rs b/src/lib.rs index 9963cff..89d12e6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ mod types; pub use types::{ GenericProvider, GitProvider, GitUrl, GitUrlBuilder, GitUrlBuilderError, GitUrlParseError, - Scheme, + Scheme, AzureDevOpsProvider, GitLabProvider, }; #[cfg(feature = "tracing")] diff --git a/src/types/mod.rs b/src/types/mod.rs index 146e6e4..30c8523 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -2,7 +2,7 @@ mod error; mod provider; pub use error::GitUrlParseError; -pub use provider::{GenericProvider, GitProvider}; +pub use provider::{GenericProvider, GitProvider, AzureDevOpsProvider, GitLabProvider}; use derive_builder::Builder; use getset::{Getters, Setters}; @@ -441,326 +441,4 @@ impl GitUrl { T::from_git_url(&self) //Err(GitUrlParseError::UnexpectedFormat) } - - //// Normalize the url so we can use Url crate to process ssh urls - //let normalized = normalize_url(url)?; - - //// Some pre-processing for paths - //let scheme = if let Ok(scheme) = Scheme::from_str(normalized.scheme()) { - // scheme - //} else { - // return Err(GitUrlParseError::UnsupportedScheme( - // normalized.scheme().to_string(), - // )); - //}; - //if normalized.path().is_empty() { - // return Err(GitUrlParseError::EmptyPath); - //} - - //// Normalized ssh urls can always have their first '/' removed - //let urlpath = match &scheme { - // Scheme::Ssh => { - // // At the moment, we're relying on url::Url's parse() behavior to not duplicate - // // the leading '/' when we normalize - // normalized.path()[1..].to_string() - // } - // _ => normalized.path().to_string(), - //}; - - //let git_suffix_check = &urlpath.ends_with(".git"); - - //// Parse through path for name,owner,organization - //// Support organizations for Azure Devops - //#[cfg(feature = "tracing")] - //debug!("The urlpath: {:?}", &urlpath); - - //// Most git services use the path for metadata in the same way, so we're going to separate - //// the metadata - //// ex. github.com/accountname/reponame - //// owner = accountname - //// name = reponame - //// - //// organizations are going to be supported on a per-host basis - //let splitpath = &urlpath.rsplit_terminator('/').collect::>(); - - //#[cfg(feature = "tracing")] - //debug!("rsplit results for metadata: {:?}", splitpath); - - //let name = splitpath[0].trim_end_matches(".git").to_string(); - - //// TODO: I think here is where we want to update the url pattern identification step.. I want to be able to have a hint that the user can pass - - //let (owner, organization, fullname) = match &scheme { - // // We're not going to assume anything about metadata from a filepath - // Scheme::File => (None::, None::, name.clone()), - // _ => { - // let mut fullname: Vec<&str> = Vec::new(); - - // // TODO: Add support for parsing out orgs from these urls - // let hosts_w_organization_in_path = ["dev.azure.com", "ssh.dev.azure.com"]; - // //vec!["dev.azure.com", "ssh.dev.azure.com", "visualstudio.com"]; - - // let host_str = if let Some(host) = normalized.host_str() { - // host - // } else { - // return Err(GitUrlParseError::UnsupportedUrlHostFormat); - // }; - - // match hosts_w_organization_in_path.contains(&host_str) { - // true => { - // #[cfg(feature = "tracing")] - // debug!("Found a git provider with an org"); - - // // The path differs between git:// and https:// schemes - - // match &scheme { - // // Example: "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName", - // Scheme::Ssh => { - // // Organization - // fullname.push(splitpath[2]); - // // Project/Owner name - // fullname.push(splitpath[1]); - // // Repo name - // fullname.push(splitpath[0]); - - // ( - // Some(splitpath[1].to_string()), - // Some(splitpath[2].to_string()), - // fullname.join("/"), - // ) - // } - // // Example: "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName", - // Scheme::Https => { - // // Organization - // fullname.push(splitpath[3]); - // // Project/Owner name - // fullname.push(splitpath[2]); - // // Repo name - // fullname.push(splitpath[0]); - - // ( - // Some(splitpath[2].to_string()), - // Some(splitpath[3].to_string()), - // fullname.join("/"), - // ) - // } - - // // TODO: I'm not sure if I want to support throwing this error long-term - // _ => return Err(GitUrlParseError::UnexpectedScheme), - // } - // } - // false => { - // if !url.starts_with("ssh") && splitpath.len() < 2 { - // return Err(GitUrlParseError::UnexpectedFormat); - // } - - // let position = match splitpath.len() { - // 0 => return Err(GitUrlParseError::UnexpectedFormat), - // 1 => 0, - // _ => 1, - // }; - - // // push owner - // fullname.push(splitpath[position]); - // // push name - // fullname.push(name.as_str()); - - // ( - // Some(splitpath[position].to_string()), - // None::, - // fullname.join("/"), - // ) - // } - // } - // } - //}; - - //let final_host = match scheme { - // Scheme::File => None, - // _ => normalized.host_str().map(|h| h.to_string()), - //}; - - //let final_path = match scheme { - // Scheme::File => { - // if let Some(host) = normalized.host_str() { - // format!("{}{}", host, urlpath) - // } else { - // urlpath - // } - // } - // _ => urlpath, - //}; - - //Ok(GitUrl { - // host: final_host, - // name, - // owner, - // organization, - // fullname, - // scheme, - // user: match normalized.username().to_string().len() { - // 0 => None, - // _ => Some(normalized.username().to_string()), - // }, - // token: normalized.password().map(|p| p.to_string()), - // port: normalized.port(), - // path: final_path, - // git_suffix: *git_suffix_check, - // scheme_prefix: url.contains("://") || url.starts_with("git:"), - //}) -} - -/// `normalize_ssh_url` takes in an ssh url that separates the login info -/// from the path into with a `:` and replaces it with `/`. -/// -/// Prepends `ssh://` to url -/// -/// Supports absolute and relative paths -//fn normalize_ssh_url(url: &str) -> Result { -// let u = url.split(':').collect::>(); -// -// match u.len() { -// 2 => { -// #[cfg(feature = "tracing")] -// debug!("Normalizing ssh url: {:?}", u); -// normalize_url(&format!("ssh://{}/{}", u[0], u[1])) -// } -// 3 => { -// #[cfg(feature = "tracing")] -// debug!("Normalizing ssh url with ports: {:?}", u); -// normalize_url(&format!("ssh://{}:{}/{}", u[0], u[1], u[2])) -// } -// _default => Err(GitUrlParseError::UnsupportedSshUrlFormat), -// } -//} - -/// `normalize_file_path` takes in a filepath and uses `Url::from_file_path()` to parse -/// -/// Prepends `file://` to url -//#[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] -//fn normalize_file_path(filepath: &str) -> Result { -// let fp = Url::from_file_path(filepath); -// -// match fp { -// Ok(path) => Ok(path), -// Err(_e) => { -// if let Ok(file_url) = normalize_url(&format!("file://{}", filepath)) { -// Ok(file_url) -// } else { -// Err(GitUrlParseError::FileUrlNormalizeFailedSchemeAdded) -// } -// } -// } -//} - -#[cfg(target_arch = "wasm32")] -fn normalize_file_path(_filepath: &str) -> Result { - unreachable!() -} - -///// `normalize_url` takes in url as `&str` and takes an opinionated approach to identify -///// `ssh://` or `file://` urls that require more information to be added so that -///// they can be parsed more effectively by `url::Url::parse()` -//pub fn normalize_url(url: &str) -> Result { -// #[cfg(feature = "tracing")] -// debug!("Processing: {:?}", &url); -// -// // TODO: Should this be extended to check for any whitespace? -// // Error if there are null bytes within the url -// // https://github.com/tjtelan/git-url-parse-rs/issues/16 -// if url.contains('\0') { -// return Err(GitUrlParseError::FoundNullBytes); -// } -// -// // We're going to remove any trailing slash before running through Url::parse -// let trim_url = url.trim_end_matches('/'); -// -// // TODO: Remove support for this form when I go to next major version. -// // I forget what it supports, and it isn't obvious after searching for examples -// // normalize short git url notation: git:host/path -// let url_to_parse = if trim_url.starts_with("git:") && !trim_url.starts_with("git://") { -// trim_url.replace("git:", "git://") -// } else { -// trim_url.to_string() -// }; -// -// let url_parse = Url::parse(&url_to_parse); -// -// Ok(match url_parse { -// Ok(u) => { -// match Scheme::from_str(u.scheme()) { -// Ok(_p) => u, -// Err(_e) => { -// // Catch case when an ssh url is given w/o a user -// #[cfg(feature = "tracing")] -// debug!("Scheme parse fail. Assuming a userless ssh url"); -// if let Ok(ssh_url) = normalize_ssh_url(trim_url) { -// ssh_url -// } else { -// return Err(GitUrlParseError::SshUrlNormalizeFailedNoScheme); -// } -// } -// } -// } -// -// // If we're here, we're only looking for Scheme::Ssh or Scheme::File -// // TODO: Add test for this -// Err(url::ParseError::RelativeUrlWithoutBase) => { -// // Assuming we have found Scheme::Ssh if we can find an "@" before ":" -// // Otherwise we have Scheme::File -// //let re = Regex::new(r"^\S+(@)\S+(:).*$").with_context(|| { -// // "Failed to build ssh git url regex for testing against url".to_string() -// //})?; -// -// match is_ssh_url(trim_url) { -// true => { -// #[cfg(feature = "tracing")] -// debug!("Scheme::SSH match for normalization"); -// normalize_ssh_url(trim_url)? -// } -// false => { -// #[cfg(feature = "tracing")] -// debug!("Scheme::File match for normalization"); -// normalize_file_path(trim_url)? -// } -// } -// } -// Err(err) => { -// return Err(GitUrlParseError::from(err)); -// } -// }) -//} - -// Valid ssh `url` for cloning have a usernames, -// but we don't require it classification or parsing purposes -// However a path must be specified with a `:` -//fn is_ssh_url(url: &str) -> bool { -// // if we do not have a path -// if !url.contains(':') { -// return false; -// } -// -// // if we have a username, expect it before the path (Are usernames with colons valid?) -// if let (Some(at_pos), Some(colon_pos)) = (url.find('@'), url.find(':')) { -// if colon_pos < at_pos { -// return false; -// } -// -// // Make sure we provided a username, and not just `@` -// let parts: Vec<&str> = url.split('@').collect(); -// return parts.len() == 2 || parts[0].is_empty(); -// } -// -// // it's an ssh url if we have a domain:path pattern -// let parts: Vec<&str> = url.split(':').collect(); -// -// // FIXME: I am not sure how to validate a url with a port -// //if parts.len() != 3 && !parts[0].is_empty() && !parts[1].is_empty() && !parts[2].is_empty() { -// // return false; -// //} -// -// // This should also handle if a port is specified -// // no port example: ssh://user@domain:path/to/repo.git -// // port example: ssh://user@domain:port/path/to/repo.git -// parts.len() == 2 && parts[0].is_empty() && parts[1].is_empty() -//} +} \ No newline at end of file diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index a4ae4fa..fe6e5b3 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -10,42 +10,42 @@ use nom::multi::many0; use nom::sequence::{preceded, separated_pair, terminated}; use nom::{IResult, Parser, combinator::opt, combinator::rest}; +use derive_builder::Builder; +use getset::{Getters, Setters}; + use crate::{GitUrl, GitUrlParseError}; pub trait GitProvider: Clone + std::fmt::Debug { fn from_git_url(url: &T) -> Result; - //fn get_url(&self, provider: &str - //fn register(&self); - //fn unregister(&self);o - //fn to_obj(&self) -> Box { - // Box::new(self.clone()) - //} } -// todo: builder -#[derive(Clone, Debug, PartialEq, Eq, Default)] +// todo: builder and setters be private? +#[derive(Debug, PartialEq, Eq, Clone, Builder, Default, Getters, Setters)] pub struct GenericProvider { pub host: String, - pub user: String, + pub owner: String, pub repo: String, } impl GenericProvider { - fn _get_user_repo(input: &str) -> IResult<&str, Option<(&str, &str)>> { + fn _get_owner_repo(input: &str) -> IResult<&str, Option<(&str, &str)>> { let (n, _) = opt(tag("/")).parse(input)?; opt(separated_pair(is_not("/"), tag("/"), rest)).parse(n) } - // fn _get_path_segment + // todo + pub fn fullname(&self) -> String { + format!("{}/{}", self.owner, self.repo) + } } impl GitProvider for GenericProvider { fn from_git_url(url: &GitUrl) -> Result { if let (Ok((_, Some((user, repo)))), Some(host)) = - (GenericProvider::_get_user_repo(url.path()), url.host()) + (GenericProvider::_get_owner_repo(url.path()), url.host()) { Ok(GenericProvider { host: host.clone(), - user: String::from(user), + owner: String::from(user), repo: String::from(repo), }) } else { @@ -54,3 +54,71 @@ impl GitProvider for GenericProvider { } } } + +// todo: builder, optional +#[derive(Clone, Debug, PartialEq, Eq, Default)] +pub struct AzureDevOpsProvider { + pub host: String, + pub org: String, + pub project: String, + pub repo: String, +} +impl AzureDevOpsProvider { + fn _get_user_repo(input: &str) -> IResult<&str, Option<(&str, &str)>> { + let (n, _) = opt(tag("/")).parse(input)?; + opt(separated_pair(is_not("/"), tag("/"), rest)).parse(n) + } + +} + +impl GitProvider for AzureDevOpsProvider { + fn from_git_url(url: &GitUrl) -> Result { + if let (Ok((_, Some((user, repo)))), Some(host)) = + (AzureDevOpsProvider::_get_user_repo(url.path()), url.host()) + { + Ok(AzureDevOpsProvider { + host: host.clone(), + org: String::from(""), + project: String::from(user), + repo: String::from(repo), + }) + } else { + // TODO: Check this error type later + Err(GitUrlParseError::UnexpectedFormat) + } + } +} + +// todo: builder, optional +#[derive(Clone, Debug, PartialEq, Eq, Default)] +pub struct GitLabProvider { + pub host: String, + pub user: String, + pub subgroup: Option>, + pub repo: String, +} +impl GitLabProvider { + fn _get_user_repo(input: &str) -> IResult<&str, Option<(&str, &str)>> { + let (n, _) = opt(tag("/")).parse(input)?; + opt(separated_pair(is_not("/"), tag("/"), rest)).parse(n) + } + +} + +impl GitProvider for GitLabProvider { + fn from_git_url(url: &GitUrl) -> Result { + if let (Ok((_, Some((user, repo)))), Some(host)) = + (GitLabProvider::_get_user_repo(url.path()), url.host()) + { + Ok(GitLabProvider { + host: host.clone(), + user: String::from(""), + subgroup: None, + repo: String::from(repo), + }) + } else { + // TODO: Check this error type later + Err(GitUrlParseError::UnexpectedFormat) + } + } +} \ No newline at end of file diff --git a/tests/mod.rs b/tests/mod.rs index 5f6f8f9..db47ff5 100644 --- a/tests/mod.rs +++ b/tests/mod.rs @@ -1,4 +1,3 @@ -//mod normalize; mod parse; mod provider; mod trim_auth; diff --git a/tests/normalize.rs b/tests/normalize.rs deleted file mode 100644 index 230dafb..0000000 --- a/tests/normalize.rs +++ /dev/null @@ -1,186 +0,0 @@ -//use git_url_parse::*; -// -//// Url Normalization -//#[test] -//fn git() { -// let test_url = "git://host.tld/user/project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// assert_eq!(normalized.as_str(), "git://host.tld/user/project-name.git"); -//} -// -//// I'm not even sure if this is a form that should be supported bc I can't find examples of it being used in the wild by another service -////#[should_panic] -//#[test] -//fn git2() { -// let test_url = "git:host.tld/user/project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// assert_eq!(normalized.as_str(), "git://host.tld/user/project-name.git"); -//} -// -//#[test] -//fn http() { -// let test_url = "http://host.tld/user/project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// assert_eq!(normalized.as_str(), "http://host.tld/user/project-name.git"); -//} -// -//#[test] -//fn https() { -// let test_url = "https://host.tld/user/project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// assert_eq!( -// normalized.as_str(), -// "https://host.tld/user/project-name.git" -// ); -//} -// -//#[test] -//fn ssh_scheme() { -// let test_url = "ssh://git@host.tld/user/project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// assert_eq!( -// normalized.as_str(), -// "ssh://git@host.tld/user/project-name.git" -// ); -//} -// -//#[test] -//fn ssh_no_scheme() { -// let test_url = "git@host.tld:user/project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// assert_eq!( -// normalized.as_str(), -// "ssh://git@host.tld/user/project-name.git" -// ); -//} -// -//#[test] -//fn ssh_no_scheme_no_user() { -// let test_url = "host.tld:user/project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// assert_eq!(normalized.as_str(), "ssh://host.tld/user/project-name.git"); -//} -// -//#[test] -//fn unix_file_scheme_abs_path() { -// let test_url = "file:///user/project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// assert_eq!(normalized.as_str(), "file:///user/project-name.git"); -//} -// -//#[test] -//fn unix_file_no_scheme_abs_path() { -// let test_url = "/user/project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// assert_eq!(normalized.as_str(), "file:///user/project-name.git"); -//} -// -//#[test] -//fn unix_file_scheme_rel_path() { -// let test_url = "file://../user/project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// assert_eq!(normalized.as_str(), "file://../user/project-name.git"); -//} -// -//#[test] -//fn unix_file_no_scheme_rel_path() { -// let test_url = "../user/project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// assert_eq!(normalized.as_str(), "file://../user/project-name.git"); -//} -// -//#[should_panic] -//#[test] -//fn win_file_scheme_abs_path() { -// let test_url = "file://c:\\user\\project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// // I actually don't know how this should be normalized. -// assert_eq!(normalized.as_str(), "file://c:\\user\\project-name.git"); -//} -// -//#[should_panic] -//#[test] -//fn win_file_no_scheme_abs_path() { -// let test_url = "c:\\user\\project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// // I actually don't know how this should be normalized. -// assert_eq!(normalized.as_str(), "file://c:\\user\\project-name.git"); -//} -// -//#[test] -//fn win_file_scheme_rel_path() { -// let test_url = "file://..\\user\\project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// // I actually don't know how this should be normalized. -// assert_eq!(normalized.as_str(), "file://../user/project-name.git"); -//} -// -//#[test] -//fn win_file_no_scheme_rel_path() { -// let test_url = "..\\user\\project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// // I actually don't know how this should be normalized. -// assert_eq!(normalized.as_str(), "file://../user/project-name.git"); -//} -//#[test] -//fn multi_git_ssh() { -// let test_url = "git+ssh://host.tld/user/project-name.git"; -// let normalized = normalize_url(test_url).expect("Normalizing url failed"); -// -// assert_eq!( -// normalized.as_str(), -// "git+ssh://host.tld/user/project-name.git" -// ); -//} -// -//// From https://github.com/tjtelan/git-url-parse-rs/issues/16 -//#[test] -//fn null_in_input1() { -// let test_url = "////////ws///////////*,\u{0}\u{0}^\u{0}\u{0}\u{0}\u{0}@2\u{1}\u{0}\u{1d})\u{0}\u{0}\u{0}:\u{0}\u{0}\u{0}"; -// let normalized = normalize_url(test_url); -// -// assert!(normalized.is_err()); -//} -// -//// From https://github.com/tjtelan/git-url-parse-rs/issues/16 -//#[test] -//fn null_in_input2() { -// let test_url = "?\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{1f}s\u{3}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{5}\u{1}@\u{0}\u{0}\u{4}!e\u{0}\u{0}2\u{1c}^3106://? = parsed.provider_info(); + assert!(provider_info.is_err()) +} \ No newline at end of file From 7778674a478c6e3524a83505d5dd01eb4bec53aa Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Fri, 29 Aug 2025 12:58:40 -0700 Subject: [PATCH 12/32] fmt and some clippy --- src/lib.rs | 4 ++-- src/types/mod.rs | 25 ++++++++++++------------- src/types/provider/mod.rs | 18 ++++-------------- tests/provider.rs | 7 +++---- 4 files changed, 21 insertions(+), 33 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 89d12e6..cab950a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ mod types; pub use types::{ - GenericProvider, GitProvider, GitUrl, GitUrlBuilder, GitUrlBuilderError, GitUrlParseError, - Scheme, AzureDevOpsProvider, GitLabProvider, + AzureDevOpsProvider, GenericProvider, GitLabProvider, GitProvider, GitUrl, GitUrlBuilder, + GitUrlBuilderError, GitUrlParseError, Scheme, }; #[cfg(feature = "tracing")] diff --git a/src/types/mod.rs b/src/types/mod.rs index 30c8523..bd8e429 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -2,7 +2,7 @@ mod error; mod provider; pub use error::GitUrlParseError; -pub use provider::{GenericProvider, GitProvider, AzureDevOpsProvider, GitLabProvider}; +pub use provider::{AzureDevOpsProvider, GenericProvider, GitLabProvider, GitProvider}; use derive_builder::Builder; use getset::{Getters, Setters}; @@ -42,7 +42,6 @@ pub enum Scheme { Ssh, ///// Represents No url scheme //Unspecified, - /// Other(String), // todo: need test for this } @@ -196,7 +195,6 @@ impl> GitUrlBuilder

{ giturl.parse_path(&mut working_url, &mut hint); - println!(""); Ok(giturl) } @@ -281,7 +279,7 @@ impl> GitUrlBuilder

{ *working_url = save; } - fn parse_ssh_path(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { + fn parse_ssh_path(&mut self, working_url: &mut &str, _hint: &mut GitUrlParseHint) { let mut builder = self.clone(); if let Ok((_leftover, Some(path))) = GitUrlBuilder::

::_parse_ssh_path(working_url) { @@ -292,7 +290,7 @@ impl> GitUrlBuilder

{ } } - fn parse_path(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { + fn parse_path(&mut self, working_url: &mut &str, _hint: &mut GitUrlParseHint) { let mut builder = self.clone(); if let Ok((leftover, path)) = GitUrlBuilder::

::_parse_path(working_url) { println!("leftover {leftover}, path: {path}"); @@ -309,6 +307,8 @@ impl> GitUrlBuilder

{ fn _parse_scheme(input: &str) -> IResult<&str, Option<&str>> { opt(terminated( alt(( + + // Fancy: Can I build an iter map on this? tag(Scheme::File.to_string().as_bytes()), tag(Scheme::Ftps.to_string().as_bytes()), tag(Scheme::Ftp.to_string().as_bytes()), @@ -337,7 +337,7 @@ impl> GitUrlBuilder

{ } fn _parse_port(input: &str) -> IResult<&str, Option<&str>> { - opt(preceded(tag(":"), take_while(|c: char| c.is_digit(10)))).parse(input) + opt(preceded(tag(":"), take_while(|c: char| c.is_ascii_digit()))).parse(input) } // This is making an assumption that the path is relative, not absolute @@ -355,9 +355,9 @@ impl> GitUrlBuilder

{ impl fmt::Display for GitUrl { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let scheme = if let Some(scheme) = &self.scheme() - && self.print_scheme().clone() + && *self.print_scheme() { - format!("{}://", scheme) + format!("{scheme}://") } else { String::new() }; @@ -385,7 +385,7 @@ impl fmt::Display for GitUrl { }; let port = match &self.port() { - Some(p) => format!(":{}", p), + Some(p) => format!(":{p}", ), None => String::new(), }; @@ -405,7 +405,7 @@ impl fmt::Display for GitUrl { let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}"); - write!(f, "{}", git_url_str) + write!(f, "{git_url_str}", ) } } @@ -438,7 +438,6 @@ impl GitUrl { where T: GitProvider, { - T::from_git_url(&self) - //Err(GitUrlParseError::UnexpectedFormat) + T::from_git_url(self) } -} \ No newline at end of file +} diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index fe6e5b3..0ec3260 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -1,13 +1,5 @@ -// generic -// gitlab (subgroups) style -// azure devops - -use nom::branch::alt; -use nom::bytes::complete::{is_not, tag, take_till, take_until, take_while}; -use nom::character::complete::{alphanumeric1, anychar, one_of}; -use nom::combinator::recognize; -use nom::multi::many0; -use nom::sequence::{preceded, separated_pair, terminated}; +use nom::bytes::complete::{is_not, tag}; +use nom::sequence::{separated_pair}; use nom::{IResult, Parser, combinator::opt, combinator::rest}; use derive_builder::Builder; @@ -68,7 +60,6 @@ impl AzureDevOpsProvider { let (n, _) = opt(tag("/")).parse(input)?; opt(separated_pair(is_not("/"), tag("/"), rest)).parse(n) } - } impl GitProvider for AzureDevOpsProvider { @@ -102,12 +93,11 @@ impl GitLabProvider { let (n, _) = opt(tag("/")).parse(input)?; opt(separated_pair(is_not("/"), tag("/"), rest)).parse(n) } - } impl GitProvider for GitLabProvider { fn from_git_url(url: &GitUrl) -> Result { - if let (Ok((_, Some((user, repo)))), Some(host)) = + if let (Ok((_, Some((_user, repo)))), Some(host)) = (GitLabProvider::_get_user_repo(url.path()), url.host()) { Ok(GitLabProvider { @@ -121,4 +111,4 @@ impl GitProvider for GitLabProvider { Err(GitUrlParseError::UnexpectedFormat) } } -} \ No newline at end of file +} diff --git a/tests/provider.rs b/tests/provider.rs index dfc0127..7320835 100644 --- a/tests/provider.rs +++ b/tests/provider.rs @@ -7,7 +7,6 @@ use git_url_parse::*; // Codeberg // https://codeberg.org/explore/repos - #[test] fn http_generic_git() { let test_url = "https://github.com/tjtelan/git-url-parse-rs.git"; @@ -170,9 +169,9 @@ fn ssh_gitlab_subgroups() { fn filepath() { let test_url = "file:///home/user/Documents/"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - + assert!(parsed.provider().is_none()); - let provider_info: Result = parsed.provider_info(); + let provider_info: Result = parsed.provider_info(); assert!(provider_info.is_err()) -} \ No newline at end of file +} From 75054f9599d51a4829404912575f22a2f3108faf Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Thu, 4 Sep 2025 11:51:42 -0700 Subject: [PATCH 13/32] Checkpoint Al the parsing works, but now trying to save the results --- Cargo.toml | 5 +- examples/nom.rs | 305 ++++++++++++++++++++++++++++++++++++-- src/lib.rs | 7 +- src/types/mod.rs | 73 +++++---- src/types/provider/mod.rs | 27 ++-- tests/parse.rs | 12 +- tests/provider.rs | 24 +-- 7 files changed, 369 insertions(+), 84 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 18f6289..c5c944d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,8 +13,11 @@ version = "0.4.6" rust-version = "1.82" [features] -default = [] +default = ["tracing"] # Do not keep tracing default tracing = ["dep:tracing"] +#filepath +#validate +#provider [dependencies] tracing = { version = "0.1", optional = true } diff --git a/examples/nom.rs b/examples/nom.rs index f4e505a..a655535 100644 --- a/examples/nom.rs +++ b/examples/nom.rs @@ -1,8 +1,285 @@ +use std::borrow::Cow; +use getset::{Getters, Setters}; use git_url_parse::{GitUrl, GitUrlParseError}; -use nom::branch::alt; -use nom::bytes::complete::tag; -use nom::multi::many0; -use nom::{IResult, Parser}; +use nom::bits::complete::take; +use nom::bytes::complete::{is_a, take_while}; +use nom::character::complete::{digit1, one_of}; +use nom::combinator::{opt, peek}; +use nom::multi::{many0, many1}; +use nom::sequence::preceded; +use nom::{ + IResult, Parser, + branch::alt, + bytes::complete::tag, + character::complete::{alpha1, alphanumeric1}, + combinator::{consumed, recognize}, + multi::many0_count, + sequence::{pair, separated_pair}, +}; + +#[derive(Debug, Getters, Setters, Default)] +struct GitUrl2 { + url: String, +} + +impl GitUrl2 { + pub fn new(url: &str) -> Self { + GitUrl2 { + url: String::from(url), + } + } + + // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A + + pub fn parse<'a>(input: &'a str) -> IResult<&'a str, &'a str> { + let (input, scheme) = Self::parse_scheme(input)?; + + // Eat the ':' when we have a scheme + let (input, scheme) = if scheme.is_some() { + let (input, _) = tag(":")(input)?; + //self.scheme = Cow::Borrowed(&scheme); + (input, scheme) + } else { + (input, None) + }; + + println!("scheme: {scheme:?}"); + + let (input, heir_part) = Self::parse_hier_part(scheme.is_some(), input)?; + println!("heir_part: {heir_part:?}"); + + Ok((input, "")) + } + + pub fn parse_scheme<'a>(input: &'a str) -> IResult<&'a str, Option<&'a str>> { + let mut check = peek(pair( + pair( + alpha1, + take_while(|c: char| { + c.is_ascii_alphabetic() + || c.is_ascii_digit() + || c == '+' + || c == '-' + || c == '.' + }), + ), + tag::<&str, &str, nom::error::Error<&str>>("://"), + )); + + if check.parse(input).is_err() { + return Ok((input, None)); + } + + // Must start with alpha character, then alpha/digit/+/-/. + let (input, scheme) = opt(recognize(pair( + alpha1, + take_while(|c: char| { + c.is_ascii_alphabetic() || c.is_ascii_digit() || c == '+' || c == '-' || c == '.' + }), + ))) + .parse(input)?; + + Ok((input, scheme)) + } + + pub fn parse_hier_part<'a>( + scheme: bool, + input: &'a str, + ) -> IResult<&'a str, Option<&'a str>> { + let input = if scheme { + let (input, _) = tag("//")(input)?; + input + } else { + input + }; + + let (input, authority) = Self::parse_authority(input)?; + println!("authority: {authority:?}"); + //let (input, part) = self.path_abempty(input); + let (input, part) = alt(( + Self::path_abempty_parser(), + Self::path_rootless_parser(), + Self::path_ssh_parser(), + )) + .parse(input)?; + //alt((self.path_ssh_parser(), self.path_abempty_parser())).parse(input)?; + + // / path-absolute + // / path-rootless + // / path-empty + + Ok((input, Some(part))) + } + + pub fn parse_authority<'a>(input: &'a str) -> IResult<&'a str, Option<&'a str>> { + let original = input; + + // Optional: username + let (input, username) = Self::parse_userinfo(input)?; + + if let Some(userinfo) = username { + if userinfo.contains(":") { + let (_, (user, token)) = separated_pair( + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + tag(":"), + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + ) + .parse(userinfo)?; + println!("user: {user:?}"); + println!("token: {token:?}"); + } else { + println!("user: {userinfo:?}"); + } + } + + // Host + let (input, authority) = + opt(recognize(take_while(|c: char| reg_name_uri_chars(c)))).parse(input)?; + + // Verify if found host is more than symbols + if let Some(host) = authority { + let is_alphanum = host.chars().into_iter().find(|c| is_alphanum(*c)).is_some(); + if !is_alphanum { + return Ok((original, None)); + } + } + + // Optional: port + let (input, port) = Self::parse_port(input)?; + if let Some(port) = port { + println!("port: {port:?}"); + } + + Ok((input, authority)) + } + + pub fn parse_userinfo<'a>(authority_input: &'a str) -> IResult<&'a str, Option<&'a str>> { + // Peek for username@ + let mut check = peek(pair( + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':'), + tag::<&str, &str, nom::error::Error<&str>>("@"), + )); + + if check.parse(authority_input).is_err() { + return Ok((authority_input, None)); + } + + // Username + let (authority_input, userinfo) = opt(recognize(take_while(|c: char| { + unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' + }))) + .parse(authority_input)?; + + let (authority_input, _) = if userinfo.is_some() { + tag("@")(authority_input)? + } else { + // No change to input, but let the compiler be happy + (authority_input, authority_input) + }; + + // Should I parse token in here? + + Ok((authority_input, userinfo)) + } + + pub fn parse_port<'a>(authority_input: &'a str) -> IResult<&'a str, Option<&'a str>> { + opt(preceded(tag(":"), digit1)).parse(authority_input) + } + + // This will get absolute paths. + // todo: test for empty and start with "//" + pub fn path_abempty_parser<'a>( + ) -> impl Parser< + &'a str, + Output = > as Parser< + &'a str, + >>::Output, + Error = nom::error::Error<&'a str>, + > { + // Starts with '/' or empty + recognize(many1(pair( + tag("/"), + take_while(|c: char| pchar_uri_chars(c)), + ))) + } + + pub fn path_ssh_parser<'a>( + ) -> impl Parser< + &'a str, + Output = > as Parser< + &'a str, + >>::Output, + Error = nom::error::Error<&'a str>, + > { + recognize(( + tag(":"), + take_while(|c: char| pchar_uri_chars(c)), + many1(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), + )) + } + + //pub fn path_absolute_parser<'a>( + // &self, + //) -> impl Parser< + // &str, + // Output = > as Parser< + // &str, + // >>::Output, + // Error = nom::error::Error<&str>, + //> { + // // Starts with '/' but not "//" + // recognize(many1(pair( + // tag("/"), + // take_while(|c: char| pchar_uri_chars(c)), + // ))) + //} + + pub fn path_rootless_parser<'a>( + ) -> impl Parser< + &'a str, + Output = > as Parser< + &'a str, + >>::Output, + Error = nom::error::Error<&'a str>, + > { + recognize(pair( + take_while(|c: char| pchar_uri_chars(c)), + many0(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), + )) + } +} + +fn pchar_uri_chars(c: char) -> bool { + // unreserved / pct-encoded (not implemented) / sub-delims / ":" / "@" + unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' || c == '@' +} + +fn reg_name_uri_chars(c: char) -> bool { + // *( unreserved / pct-encoded / sub-delims ) + unreserved_uri_chars(c) || subdelims_uri_chars(c) +} +fn unreserved_uri_chars(c: char) -> bool { + is_alphanum(c) || c == '-' || c == '.' || c == '_' || c == '~' +} + +fn is_alphanum(c: char) -> bool { + c.is_ascii_alphabetic() || c.is_ascii_digit() +} + +fn subdelims_uri_chars(c: char) -> bool { + c == '!' + || c == '$' + || c == '&' + || c == '\'' + || c == '(' + || c == ')' + || c == '*' + || c == '+' + || c == ',' + || c == ';' + || c == '=' + || c == '\\' // This is not part of spec, but used for windows paths +} fn main() -> Result<(), GitUrlParseError> { env_logger::init(); @@ -21,6 +298,7 @@ fn main() -> Result<(), GitUrlParseError> { "~/path/to/repo.git/", "./path/to/repo.git/", "./path/to/repo.git", + "/path/to/repo.git", "../test_repo", "..\\test_repo", "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName", @@ -28,13 +306,18 @@ fn main() -> Result<(), GitUrlParseError> { ]; for test_url in test_vec { - let parsed = GitUrl::parse(test_url).unwrap(); - //println!("leftover:{leftover:#?}, output:{output:#?}"); - //let parsed = GitUrl::parse(test_url)?; - //println!("Original: {}", test_url); - println!("Parsed: {}", parsed); - println!("Parsed: {:#?}", parsed); - //println!("{:?}\n", parsed); + //let parsed = GitUrl::parse(test_url).unwrap(); + ////println!("leftover:{leftover:#?}, output:{output:#?}"); + ////let parsed = GitUrl::parse(test_url)?; + ////println!("Original: {}", test_url); + //println!("Parsed: {}", parsed); + //println!("Parsed: {:#?}", parsed); + ////println!("{:?}\n", parsed); + + let parsed = GitUrl2::parse(test_url).unwrap(); + println!("{parsed:?}"); + //println!("{:?}", parsed.parse()); + println!(""); } Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index cab950a..02d7e9b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,8 @@ -mod types; +pub mod types; + +// Re-exports pub use types::{ - AzureDevOpsProvider, GenericProvider, GitLabProvider, GitProvider, GitUrl, GitUrlBuilder, - GitUrlBuilderError, GitUrlParseError, Scheme, + GenericProvider, GitProvider, GitUrl, GitUrlBuilder, GitUrlBuilderError, GitUrlParseError, }; #[cfg(feature = "tracing")] diff --git a/src/types/mod.rs b/src/types/mod.rs index bd8e429..e2e6af9 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -4,25 +4,27 @@ mod provider; pub use error::GitUrlParseError; pub use provider::{AzureDevOpsProvider, GenericProvider, GitLabProvider, GitProvider}; -use derive_builder::Builder; -use getset::{Getters, Setters}; -use strum::{Display, EnumString, VariantNames}; - use core::str; use std::fmt; use std::str::FromStr; -//use url::Url; +use derive_builder::Builder; +use getset::{Getters, Setters}; use nom::branch::alt; use nom::bytes::complete::{tag, take_till, take_until, take_while}; use nom::character::complete::one_of; use nom::sequence::{preceded, terminated}; use nom::{IResult, Parser, combinator::opt, combinator::rest}; +use strum::{Display, EnumString, VariantNames}; +#[cfg(feature = "tracing")] +use tracing::debug; +use typed_path::{Utf8TypedPath, Utf8TypedPathBuf}; +// todo: let's get rid of this /// Supported uri schemes for parsing #[derive(Debug, PartialEq, Eq, EnumString, VariantNames, Clone, Display)] #[strum(serialize_all = "kebab_case")] -pub enum Scheme { +pub(crate) enum Scheme { /// Represents `file://` url scheme File, /// Represents `ftp://` url scheme @@ -52,55 +54,43 @@ pub(crate) enum GitUrlParseHint { Sshlike, Filelike, Httplike, - //Custom // needed? } /// GitUrl represents an input url that is a url used by git /// Internally during parsing the url is sanitized and uses the `url` crate to perform /// the majority of the parsing effort, and with some extra handling to expose /// metadata used my many git hosting services -#[derive(Debug, PartialEq, Eq, Clone, Builder, Default, Getters, Setters)] +#[derive(Debug, PartialEq, Eq, Clone, Builder, Getters, Setters)] #[builder(build_fn(validate = "Self::prebuild_check"), field(public))] #[get = "pub"] pub struct GitUrl

where P: GitProvider, { - /// The fully qualified domain name (FQDN) or IP of the repo + /// The host, domain or IP of the repo #[builder(setter(into, strip_option), default)] host: Option, - ///// The name of the repo - //pub name: String, - ///// The owner/account/project name - //pub owner: Option, - ///// The organization name. Supported by Azure DevOps - //pub organization: Option, - ///// The full name of the repo, formatted as "owner/name" - //pub fullname: String, - ///// The git url scheme + /// The url scheme #[builder(setter(into, strip_option), default)] scheme: Option, - /// The authentication user + /// Authentication user #[builder(setter(into, strip_option), default)] #[getset(set = "pub(crate)")] user: Option, - /// The oauth token (could appear in the https urls) + /// Authentication token (could appear in the https urls) #[builder(setter(into, strip_option), default)] #[getset(set = "pub(crate)")] token: Option, - /// The non-conventional port where git service is hosted + /// The port where git service is hosted #[builder(setter(into, strip_option), default)] port: Option, /// The path to repo w/ respect to user + hostname #[builder(setter(into))] - path: String, - ///// Indicate if url uses the .git suffix - //pub git_suffix: bool, - ///// Indicate if url explicitly uses its scheme - //pub scheme_prefix: bool, + path: Utf8TypedPathBuf, + /// Include scheme:// when printing url #[builder(default)] print_scheme: bool, - + /// Hosted git provider info derived from GitUrl #[builder(setter(into, strip_option), default)] provider: Option

, } @@ -112,8 +102,8 @@ impl> GitUrlBuilder

{ } fn prebuild_check(&self) -> Result<(), String> { - #[cfg(feature = "tracing")] - debug!("Processing: {:?}", &url); + //#[cfg(feature = "tracing")] + //debug!("Processing: {:?}", &url); // Error if there are null bytes within the url @@ -155,10 +145,10 @@ impl> GitUrlBuilder

{ } if let Some(path) = &self.path { - if path.contains('\0') { + if path.as_str().contains('\0') { return Err(GitUrlParseError::FoundNullBytes.to_string()); } - if path.is_empty() { + if path.as_str().is_empty() { return Err( GitUrlParseError::UnexpectedEmptyValue(String::from("path")).to_string() ); @@ -169,7 +159,7 @@ impl> GitUrlBuilder

{ } fn parse(url: &str) -> Result { - println!("start: {url}"); + debug!("{url}"); let mut giturl = GitUrlBuilder::default(); let mut working_url = url; let mut hint = GitUrlParseHint::default(); @@ -181,9 +171,14 @@ impl> GitUrlBuilder

{ giturl.parse_host_port(&mut working_url, &mut hint); match hint { - GitUrlParseHint::Httplike => {} + GitUrlParseHint::Httplike => { + if working_url.starts_with(":") && giturl.port.is_none() { + return Err(GitUrlParseError::UnexpectedFormat); + } else { + println!("Nothing wrong here: {working_url}"); + } + } GitUrlParseHint::Sshlike => { - //working_url = giturl.parse_ssh_path(&working_url); giturl.parse_ssh_path(&mut working_url, &mut hint); } GitUrlParseHint::Filelike | GitUrlParseHint::Unknown => { @@ -295,7 +290,8 @@ impl> GitUrlBuilder

{ if let Ok((leftover, path)) = GitUrlBuilder::

::_parse_path(working_url) { println!("leftover {leftover}, path: {path}"); - builder.path(path); + let parsed_path = Utf8TypedPath::derive(path).to_path_buf(); + builder.path(parsed_path); *self = builder; *working_url = leftover; @@ -307,7 +303,6 @@ impl> GitUrlBuilder

{ fn _parse_scheme(input: &str) -> IResult<&str, Option<&str>> { opt(terminated( alt(( - // Fancy: Can I build an iter map on this? tag(Scheme::File.to_string().as_bytes()), tag(Scheme::Ftps.to_string().as_bytes()), @@ -385,13 +380,13 @@ impl fmt::Display for GitUrl { }; let port = match &self.port() { - Some(p) => format!(":{p}", ), + Some(p) => format!(":{p}",), None => String::new(), }; let path = if self.scheme().clone() == Some(Scheme::Ssh) { if self.port().is_some() { - if !self.path().starts_with('/') { + if !self.path().as_str().starts_with('/') { format!("/{}", &self.path()) } else { self.path().to_string() @@ -405,7 +400,7 @@ impl fmt::Display for GitUrl { let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}"); - write!(f, "{git_url_str}", ) + write!(f, "{git_url_str}",) } } diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index 0ec3260..56618ef 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -1,5 +1,5 @@ use nom::bytes::complete::{is_not, tag}; -use nom::sequence::{separated_pair}; +use nom::sequence::separated_pair; use nom::{IResult, Parser, combinator::opt, combinator::rest}; use derive_builder::Builder; @@ -20,8 +20,8 @@ pub struct GenericProvider { } impl GenericProvider { fn _get_owner_repo(input: &str) -> IResult<&str, Option<(&str, &str)>> { - let (n, _) = opt(tag("/")).parse(input)?; - opt(separated_pair(is_not("/"), tag("/"), rest)).parse(n) + let (input, _) = opt(tag("/")).parse(input)?; + opt(separated_pair(is_not("/"), tag("/"), rest)).parse(input) } // todo @@ -32,9 +32,10 @@ impl GenericProvider { impl GitProvider for GenericProvider { fn from_git_url(url: &GitUrl) -> Result { - if let (Ok((_, Some((user, repo)))), Some(host)) = - (GenericProvider::_get_owner_repo(url.path()), url.host()) - { + if let (Ok((_, Some((user, repo)))), Some(host)) = ( + GenericProvider::_get_owner_repo(url.path().as_str()), + url.host(), + ) { Ok(GenericProvider { host: host.clone(), owner: String::from(user), @@ -64,9 +65,10 @@ impl AzureDevOpsProvider { impl GitProvider for AzureDevOpsProvider { fn from_git_url(url: &GitUrl) -> Result { - if let (Ok((_, Some((user, repo)))), Some(host)) = - (AzureDevOpsProvider::_get_user_repo(url.path()), url.host()) - { + if let (Ok((_, Some((user, repo)))), Some(host)) = ( + AzureDevOpsProvider::_get_user_repo(url.path().as_str()), + url.host(), + ) { Ok(AzureDevOpsProvider { host: host.clone(), org: String::from(""), @@ -97,9 +99,10 @@ impl GitLabProvider { impl GitProvider for GitLabProvider { fn from_git_url(url: &GitUrl) -> Result { - if let (Ok((_, Some((_user, repo)))), Some(host)) = - (GitLabProvider::_get_user_repo(url.path()), url.host()) - { + if let (Ok((_, Some((_user, repo)))), Some(host)) = ( + GitLabProvider::_get_user_repo(url.path().as_str()), + url.host(), + ) { Ok(GitLabProvider { host: host.clone(), user: String::from(""), diff --git a/tests/parse.rs b/tests/parse.rs index 0eddb17..ef21762 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -4,7 +4,7 @@ fn ssh_user_ports() { let test_url = "ssh://git@host.tld:9999/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() - .scheme(Scheme::Ssh) + .scheme("ssh".into()) .host("host.tld") .user("git") .port(9999 as u16) @@ -209,7 +209,7 @@ fn absolute_unix_path() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() .scheme(Scheme::File) - .path("/path/to/project-name.git") + .path(test_url) .build() .unwrap(); @@ -219,11 +219,11 @@ fn absolute_unix_path() { // Issue #6 - Relative Windows paths will parse into Unix paths #[test] fn relative_windows_path() { - let test_url = "..\\project-name.git"; + let test_url = r"..\project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() .scheme(Scheme::File) - .path("../project-name.git") + .path(test_url) .build() .unwrap(); @@ -235,11 +235,11 @@ fn relative_windows_path() { #[should_panic(expected = "URL parse failed: UnexpectedFormat")] #[test] fn absolute_windows_path() { - let test_url = "c:\\project-name.git"; + let test_url = r"c:\project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let expected = GitUrlBuilder::default() .scheme(Scheme::File) - .path("c:\\project-name.git") + .path(test_url) .build() .unwrap(); diff --git a/tests/provider.rs b/tests/provider.rs index 7320835..2e45c68 100644 --- a/tests/provider.rs +++ b/tests/provider.rs @@ -76,8 +76,8 @@ fn http_azure_devops() { let test_url = "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let provider_info: AzureDevOpsProvider = parsed.provider_info().unwrap(); - let expected = AzureDevOpsProvider { + let provider_info: types::AzureDevOpsProvider = parsed.provider_info().unwrap(); + let expected = types::AzureDevOpsProvider { host: "dev.azure.com".to_string(), org: "CompanyName".to_string(), project: "ProjectName".to_string(), @@ -91,8 +91,8 @@ fn ssh_azure_devops() { let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let provider_info: AzureDevOpsProvider = parsed.provider_info().unwrap(); - let expected = AzureDevOpsProvider { + let provider_info: types::AzureDevOpsProvider = parsed.provider_info().unwrap(); + let expected = types::AzureDevOpsProvider { host: "ssh.dev.azure.com".to_string(), org: "CompanyName".to_string(), project: "ProjectName".to_string(), @@ -110,8 +110,8 @@ fn http_gitlab() { let test_url = "https://gitlab.com/gitlab-org/gitlab.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let provider_info: GitLabProvider = parsed.provider_info().unwrap(); - let expected = GitLabProvider { + let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); + let expected = types::GitLabProvider { host: "gitlab.com".to_string(), user: "gitlab-org".to_string(), subgroup: None, @@ -125,8 +125,8 @@ fn ssh_gitlab() { let test_url = "git@gitlab.com:gitlab-org/gitlab.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let provider_info: GitLabProvider = parsed.provider_info().unwrap(); - let expected = GitLabProvider { + let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); + let expected = types::GitLabProvider { host: "gitlab.com".to_string(), user: "gitlab-org".to_string(), subgroup: None, @@ -140,8 +140,8 @@ fn http_gitlab_subgroups() { let test_url = "https://gitlab.com/gitlab-org/sbom/systems/gitlab-core.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let provider_info: GitLabProvider = parsed.provider_info().unwrap(); - let expected = GitLabProvider { + let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); + let expected = types::GitLabProvider { host: "gitlab.com".to_string(), user: "gitlab-org".to_string(), subgroup: Some(vec!["sbom".to_string(), "systems".to_string()]), @@ -155,8 +155,8 @@ fn ssh_gitlab_subgroups() { let test_url = "git@gitlab.com:gitlab-org/sbom/systems/gitlab-core.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let provider_info: GitLabProvider = parsed.provider_info().unwrap(); - let expected = GitLabProvider { + let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); + let expected = types::GitLabProvider { host: "gitlab.com".to_string(), user: "gitlab-org".to_string(), subgroup: Some(vec!["sbom".to_string(), "systems".to_string()]), From 30f4eb1cbae08ac731cf56aa3e4a307795add6d0 Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Thu, 4 Sep 2025 16:16:00 -0700 Subject: [PATCH 14/32] Saving slices works --- examples/nom.rs | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/examples/nom.rs b/examples/nom.rs index a655535..50166c4 100644 --- a/examples/nom.rs +++ b/examples/nom.rs @@ -18,22 +18,32 @@ use nom::{ }; #[derive(Debug, Getters, Setters, Default)] -struct GitUrl2 { +struct GitUrl2<'a> { url: String, + scheme: Option<&'a str>, } -impl GitUrl2 { +impl<'a> GitUrl2<'a> { pub fn new(url: &str) -> Self { GitUrl2 { url: String::from(url), + ..Default::default() } } // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A - pub fn parse<'a>(input: &'a str) -> IResult<&'a str, &'a str> { + pub fn parse(input: &'a str) -> IResult<&'a str, Self> { + let original = input; let (input, scheme) = Self::parse_scheme(input)?; + let scheme_slice = if let Some(scheme) = scheme { + if let Some(index) = original.find(scheme) { + //println!("scheme slice: {}", &original[index..(index+scheme.len())]); + Some(&original[index..(index+scheme.len())]) + } else { None } + } else { None }; + // Eat the ':' when we have a scheme let (input, scheme) = if scheme.is_some() { let (input, _) = tag(":")(input)?; @@ -48,10 +58,10 @@ impl GitUrl2 { let (input, heir_part) = Self::parse_hier_part(scheme.is_some(), input)?; println!("heir_part: {heir_part:?}"); - Ok((input, "")) + Ok((input, GitUrl2{ url: original.to_string(), scheme: scheme_slice})) } - pub fn parse_scheme<'a>(input: &'a str) -> IResult<&'a str, Option<&'a str>> { + pub fn parse_scheme(input: &'a str) -> IResult<&'a str, Option<&'a str>> { let mut check = peek(pair( pair( alpha1, @@ -82,7 +92,7 @@ impl GitUrl2 { Ok((input, scheme)) } - pub fn parse_hier_part<'a>( + pub fn parse_hier_part( scheme: bool, input: &'a str, ) -> IResult<&'a str, Option<&'a str>> { @@ -111,7 +121,7 @@ impl GitUrl2 { Ok((input, Some(part))) } - pub fn parse_authority<'a>(input: &'a str) -> IResult<&'a str, Option<&'a str>> { + pub fn parse_authority(input: &'a str) -> IResult<&'a str, Option<&'a str>> { let original = input; // Optional: username @@ -153,7 +163,7 @@ impl GitUrl2 { Ok((input, authority)) } - pub fn parse_userinfo<'a>(authority_input: &'a str) -> IResult<&'a str, Option<&'a str>> { + pub fn parse_userinfo(authority_input: &'a str) -> IResult<&'a str, Option<&'a str>> { // Peek for username@ let mut check = peek(pair( take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':'), @@ -182,13 +192,13 @@ impl GitUrl2 { Ok((authority_input, userinfo)) } - pub fn parse_port<'a>(authority_input: &'a str) -> IResult<&'a str, Option<&'a str>> { + pub fn parse_port(authority_input: &'a str) -> IResult<&'a str, Option<&'a str>> { opt(preceded(tag(":"), digit1)).parse(authority_input) } // This will get absolute paths. // todo: test for empty and start with "//" - pub fn path_abempty_parser<'a>( + pub fn path_abempty_parser( ) -> impl Parser< &'a str, Output = > as Parser< @@ -203,7 +213,7 @@ impl GitUrl2 { ))) } - pub fn path_ssh_parser<'a>( + pub fn path_ssh_parser( ) -> impl Parser< &'a str, Output = > as Parser< @@ -234,7 +244,7 @@ impl GitUrl2 { // ))) //} - pub fn path_rootless_parser<'a>( + pub fn path_rootless_parser( ) -> impl Parser< &'a str, Output = > as Parser< From b19abf262096b2c2622e46d8231bd4257a844c25 Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Thu, 4 Sep 2025 16:43:13 -0700 Subject: [PATCH 15/32] Move where char is parsed The colon for scheme is now in the scheme parser --- examples/nom.rs | 97 ++++++++++++++++++++++++++++++------------------- 1 file changed, 59 insertions(+), 38 deletions(-) diff --git a/examples/nom.rs b/examples/nom.rs index 50166c4..1342fd0 100644 --- a/examples/nom.rs +++ b/examples/nom.rs @@ -1,12 +1,13 @@ -use std::borrow::Cow; use getset::{Getters, Setters}; use git_url_parse::{GitUrl, GitUrlParseError}; +use nom::FindSubstring; use nom::bits::complete::take; use nom::bytes::complete::{is_a, take_while}; use nom::character::complete::{digit1, one_of}; use nom::combinator::{opt, peek}; +use nom::error::context; use nom::multi::{many0, many1}; -use nom::sequence::preceded; +use nom::sequence::{preceded, terminated}; use nom::{ IResult, Parser, branch::alt, @@ -16,6 +17,7 @@ use nom::{ multi::many0_count, sequence::{pair, separated_pair}, }; +use std::borrow::Cow; #[derive(Debug, Getters, Setters, Default)] struct GitUrl2<'a> { @@ -35,30 +37,40 @@ impl<'a> GitUrl2<'a> { pub fn parse(input: &'a str) -> IResult<&'a str, Self> { let original = input; - let (input, scheme) = Self::parse_scheme(input)?; + let (input, scheme) = Self::parse_scheme.parse(input)?; let scheme_slice = if let Some(scheme) = scheme { - if let Some(index) = original.find(scheme) { + if let Some(index) = original.find_substring(scheme) { //println!("scheme slice: {}", &original[index..(index+scheme.len())]); - Some(&original[index..(index+scheme.len())]) - } else { None } - } else { None }; - - // Eat the ':' when we have a scheme - let (input, scheme) = if scheme.is_some() { - let (input, _) = tag(":")(input)?; - //self.scheme = Cow::Borrowed(&scheme); - (input, scheme) + Some(&original[index..(index + scheme.len())]) + } else { + None + } } else { - (input, None) + None }; + // Eat the ':' when we have a scheme + //let (input, scheme) = if scheme.is_some() { + // let (input, _) = tag(":")(input)?; + // //self.scheme = Cow::Borrowed(&scheme); + // (input, scheme) + //} else { + // (input, None) + //}; + println!("scheme: {scheme:?}"); let (input, heir_part) = Self::parse_hier_part(scheme.is_some(), input)?; println!("heir_part: {heir_part:?}"); - Ok((input, GitUrl2{ url: original.to_string(), scheme: scheme_slice})) + Ok(( + input, + GitUrl2 { + url: original.to_string(), + scheme: scheme_slice, + }, + )) } pub fn parse_scheme(input: &'a str) -> IResult<&'a str, Option<&'a str>> { @@ -81,33 +93,42 @@ impl<'a> GitUrl2<'a> { } // Must start with alpha character, then alpha/digit/+/-/. - let (input, scheme) = opt(recognize(pair( - alpha1, - take_while(|c: char| { - c.is_ascii_alphabetic() || c.is_ascii_digit() || c == '+' || c == '-' || c == '.' - }), - ))) - .parse(input)?; - - Ok((input, scheme)) + //let (input, scheme) = opt(recognize(pair( + context( + "Scheme parse", + opt(terminated( + recognize(pair( + alpha1, + take_while(|c: char| { + c.is_ascii_alphabetic() + || c.is_ascii_digit() + || c == '+' + || c == '-' + || c == '.' + }), + )), + tag(":"), + )), + ) + //.parse(input)?; + .parse(input) + + //Ok((input, scheme)) } - pub fn parse_hier_part( - scheme: bool, - input: &'a str, - ) -> IResult<&'a str, Option<&'a str>> { - let input = if scheme { - let (input, _) = tag("//")(input)?; - input - } else { - input - }; + pub fn parse_hier_part(scheme: bool, input: &'a str) -> IResult<&'a str, Option<&'a str>> { + //let input = if scheme { + // let (input, _) = tag("//")(input)?; + // input + //} else { + // input + //}; let (input, authority) = Self::parse_authority(input)?; println!("authority: {authority:?}"); //let (input, part) = self.path_abempty(input); let (input, part) = alt(( - Self::path_abempty_parser(), + preceded(tag("//"), Self::path_abempty_parser()), Self::path_rootless_parser(), Self::path_ssh_parser(), )) @@ -205,7 +226,7 @@ impl<'a> GitUrl2<'a> { &'a str, >>::Output, Error = nom::error::Error<&'a str>, - > { + >{ // Starts with '/' or empty recognize(many1(pair( tag("/"), @@ -220,7 +241,7 @@ impl<'a> GitUrl2<'a> { &'a str, >>::Output, Error = nom::error::Error<&'a str>, - > { + >{ recognize(( tag(":"), take_while(|c: char| pchar_uri_chars(c)), @@ -251,7 +272,7 @@ impl<'a> GitUrl2<'a> { &'a str, >>::Output, Error = nom::error::Error<&'a str>, - > { + >{ recognize(pair( take_while(|c: char| pchar_uri_chars(c)), many0(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), From c4e898e0331bc299810d83bb7468c1005ac49b2b Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Thu, 4 Sep 2025 17:52:50 -0700 Subject: [PATCH 16/32] All fields slices into struct --- examples/nom.rs | 149 ++++++++++++++++++++++++++++++------------------ 1 file changed, 93 insertions(+), 56 deletions(-) diff --git a/examples/nom.rs b/examples/nom.rs index 1342fd0..b8795d3 100644 --- a/examples/nom.rs +++ b/examples/nom.rs @@ -18,11 +18,17 @@ use nom::{ sequence::{pair, separated_pair}, }; use std::borrow::Cow; +use std::path; #[derive(Debug, Getters, Setters, Default)] struct GitUrl2<'a> { url: String, scheme: Option<&'a str>, + user: Option<&'a str>, + token: Option<&'a str>, + host: Option<&'a str>, + port: Option<&'a str>, + path: Option<&'a str>, } impl<'a> GitUrl2<'a> { @@ -39,16 +45,7 @@ impl<'a> GitUrl2<'a> { let original = input; let (input, scheme) = Self::parse_scheme.parse(input)?; - let scheme_slice = if let Some(scheme) = scheme { - if let Some(index) = original.find_substring(scheme) { - //println!("scheme slice: {}", &original[index..(index+scheme.len())]); - Some(&original[index..(index + scheme.len())]) - } else { - None - } - } else { - None - }; + let scheme_slice = as_slice_bounds(original, scheme); // Eat the ':' when we have a scheme //let (input, scheme) = if scheme.is_some() { @@ -59,34 +56,53 @@ impl<'a> GitUrl2<'a> { // (input, None) //}; - println!("scheme: {scheme:?}"); + //println!("scheme: {scheme:?}"); + + let (input, heir_part) = Self::parse_hier_part(input)?; - let (input, heir_part) = Self::parse_hier_part(scheme.is_some(), input)?; - println!("heir_part: {heir_part:?}"); + let (user_opt, token_opt) = heir_part.0.0; + let (host_opt) = heir_part.0.1; + let (port_opt) = heir_part.0.2; + let (path_opt) = heir_part.1; + + let user_slice = as_slice_bounds(original, user_opt); + let token_slice = as_slice_bounds(original, token_opt); + let host_slice = as_slice_bounds(original, host_opt); + let port_slice = as_slice_bounds(original, port_opt); + let path_slice = as_slice_bounds(original, path_opt); + //println!("heir_part: {heir_part:?}"); Ok(( input, GitUrl2 { url: original.to_string(), scheme: scheme_slice, + user: user_slice, + token: token_slice, + host: host_slice, + port: port_slice, + path: path_slice, }, )) } pub fn parse_scheme(input: &'a str) -> IResult<&'a str, Option<&'a str>> { - let mut check = peek(pair( - pair( - alpha1, - take_while(|c: char| { - c.is_ascii_alphabetic() - || c.is_ascii_digit() - || c == '+' - || c == '-' - || c == '.' - }), - ), - tag::<&str, &str, nom::error::Error<&str>>("://"), - )); + let mut check = context( + "scheme validate", + peek(pair( + pair( + alpha1, + take_while(|c: char| { + c.is_ascii_alphabetic() + || c.is_ascii_digit() + || c == '+' + || c == '-' + || c == '.' + }), + ), + tag::<&str, &str, nom::error::Error<&str>>("://"), + )), + ); if check.parse(input).is_err() { return Ok((input, None)); @@ -107,7 +123,8 @@ impl<'a> GitUrl2<'a> { || c == '.' }), )), - tag(":"), + // We consume the "://" here to allow scheme to be optional + tag("://"), )), ) //.parse(input)?; @@ -116,7 +133,8 @@ impl<'a> GitUrl2<'a> { //Ok((input, scheme)) } - pub fn parse_hier_part(scheme: bool, input: &'a str) -> IResult<&'a str, Option<&'a str>> { + //pub fn parse_hier_part(scheme: bool, input: &'a str) -> IResult<&'a str, Option<&'a str>> { + pub fn parse_hier_part(input: &'a str) -> IResult<&'a str, (((Option<&str>, Option<&str>), Option<&str>, Option<&str>),Option<&'a str>)> { //let input = if scheme { // let (input, _) = tag("//")(input)?; // input @@ -125,7 +143,7 @@ impl<'a> GitUrl2<'a> { //}; let (input, authority) = Self::parse_authority(input)?; - println!("authority: {authority:?}"); + //println!("authority: {authority:?}"); //let (input, part) = self.path_abempty(input); let (input, part) = alt(( preceded(tag("//"), Self::path_abempty_parser()), @@ -139,52 +157,39 @@ impl<'a> GitUrl2<'a> { // / path-rootless // / path-empty - Ok((input, Some(part))) + Ok((input, (authority, Some(part)))) } - pub fn parse_authority(input: &'a str) -> IResult<&'a str, Option<&'a str>> { + pub fn parse_authority(input: &'a str) -> IResult<&'a str, ((Option<&str>, Option<&str>), Option<&str>, Option<&str>)> { let original = input; - // Optional: username - let (input, username) = Self::parse_userinfo(input)?; - - if let Some(userinfo) = username { - if userinfo.contains(":") { - let (_, (user, token)) = separated_pair( - take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), - tag(":"), - take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), - ) - .parse(userinfo)?; - println!("user: {user:?}"); - println!("token: {token:?}"); - } else { - println!("user: {userinfo:?}"); - } - } + // Optional: username / token + let (input, userinfo) = Self::parse_userinfo(input)?; // Host - let (input, authority) = + let (input, host) = opt(recognize(take_while(|c: char| reg_name_uri_chars(c)))).parse(input)?; // Verify if found host is more than symbols - if let Some(host) = authority { + if let Some(host) = host { let is_alphanum = host.chars().into_iter().find(|c| is_alphanum(*c)).is_some(); if !is_alphanum { - return Ok((original, None)); + return Ok((original, ((None, None), None, None))); } } // Optional: port let (input, port) = Self::parse_port(input)?; if let Some(port) = port { - println!("port: {port:?}"); + //println!("port: {port:?}"); } - Ok((input, authority)) + Ok((input, (userinfo, host, port))) } - pub fn parse_userinfo(authority_input: &'a str) -> IResult<&'a str, Option<&'a str>> { + pub fn parse_userinfo( + authority_input: &'a str, + ) -> IResult<&'a str, (Option<&'a str>, Option<&'a str>)> { // Peek for username@ let mut check = peek(pair( take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':'), @@ -192,7 +197,7 @@ impl<'a> GitUrl2<'a> { )); if check.parse(authority_input).is_err() { - return Ok((authority_input, None)); + return Ok((authority_input, (None, None))); } // Username @@ -210,7 +215,26 @@ impl<'a> GitUrl2<'a> { // Should I parse token in here? - Ok((authority_input, userinfo)) + let (user, token) = if let Some(userinfo) = userinfo { + if userinfo.contains(":") { + let (_, (user, token)) = separated_pair( + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + tag(":"), + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + ) + .parse(userinfo)?; + //println!("user: {user:?}"); + //println!("token: {token:?}"); + (Some(user), Some(token)) + } else { + //println!("user: {userinfo:?}"); + (Some(userinfo), None) + } + } else { + (None, None) + }; + + Ok((authority_input, (user, token))) } pub fn parse_port(authority_input: &'a str) -> IResult<&'a str, Option<&'a str>> { @@ -280,6 +304,19 @@ impl<'a> GitUrl2<'a> { } } +fn as_slice_bounds<'a>(original: &'a str, field: Option<&'a str>) -> Option<&'a str> { + if let Some(field) = field { + if let Some(index) = original.find_substring(field) { + //println!("scheme slice: {}", &original[index..(index+scheme.len())]); + Some(&original[index..(index + field.len())]) + } else { + None + } + } else { + None + } +} + fn pchar_uri_chars(c: char) -> bool { // unreserved / pct-encoded (not implemented) / sub-delims / ":" / "@" unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' || c == '@' From 1175a8dcb5d57d0df6641be14f284c0b4f254522 Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Thu, 4 Sep 2025 20:42:25 -0700 Subject: [PATCH 17/32] More cleanup before moving into crate --- examples/nom.rs | 252 ++++++++++++++++++++---------------------------- 1 file changed, 107 insertions(+), 145 deletions(-) diff --git a/examples/nom.rs b/examples/nom.rs index b8795d3..51e312e 100644 --- a/examples/nom.rs +++ b/examples/nom.rs @@ -1,10 +1,9 @@ use getset::{Getters, Setters}; use git_url_parse::{GitUrl, GitUrlParseError}; use nom::FindSubstring; -use nom::bits::complete::take; use nom::bytes::complete::{is_a, take_while}; use nom::character::complete::{digit1, one_of}; -use nom::combinator::{opt, peek}; +use nom::combinator::{opt, peek, verify}; use nom::error::context; use nom::multi::{many0, many1}; use nom::sequence::{preceded, terminated}; @@ -12,13 +11,10 @@ use nom::{ IResult, Parser, branch::alt, bytes::complete::tag, - character::complete::{alpha1, alphanumeric1}, - combinator::{consumed, recognize}, - multi::many0_count, + character::complete::alpha1, + combinator::recognize, sequence::{pair, separated_pair}, }; -use std::borrow::Cow; -use std::path; #[derive(Debug, Getters, Setters, Default)] struct GitUrl2<'a> { @@ -32,32 +28,19 @@ struct GitUrl2<'a> { } impl<'a> GitUrl2<'a> { - pub fn new(url: &str) -> Self { - GitUrl2 { - url: String::from(url), - ..Default::default() - } - } - - // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A - + // https://datatracker.ietf.org/doc/html/rfc3986 + // Based on rfc3986, but does not strictly cover the spec + // * No support for: + // * query, fragment, percent-encoding, and much of the edges for path support + // * many forms of ip representations like ipv6, hexdigits + // * Added support for: + // * parsing ssh git urls which use ":" as a delimiter between the authority and path + // * parsing userinfo into user:token (but its officially deprecated, per #section-3.2.1) + // * some limited support for windows/linux filepaths pub fn parse(input: &'a str) -> IResult<&'a str, Self> { let original = input; - let (input, scheme) = Self::parse_scheme.parse(input)?; - - let scheme_slice = as_slice_bounds(original, scheme); - - // Eat the ':' when we have a scheme - //let (input, scheme) = if scheme.is_some() { - // let (input, _) = tag(":")(input)?; - // //self.scheme = Cow::Borrowed(&scheme); - // (input, scheme) - //} else { - // (input, None) - //}; - - //println!("scheme: {scheme:?}"); + let (input, scheme) = Self::parse_scheme.parse(input)?; let (input, heir_part) = Self::parse_hier_part(input)?; let (user_opt, token_opt) = heir_part.0.0; @@ -65,23 +48,16 @@ impl<'a> GitUrl2<'a> { let (port_opt) = heir_part.0.2; let (path_opt) = heir_part.1; - let user_slice = as_slice_bounds(original, user_opt); - let token_slice = as_slice_bounds(original, token_opt); - let host_slice = as_slice_bounds(original, host_opt); - let port_slice = as_slice_bounds(original, port_opt); - let path_slice = as_slice_bounds(original, path_opt); - //println!("heir_part: {heir_part:?}"); - Ok(( input, GitUrl2 { url: original.to_string(), - scheme: scheme_slice, - user: user_slice, - token: token_slice, - host: host_slice, - port: port_slice, - path: path_slice, + scheme, + user: user_opt, + token: token_opt, + host: host_opt, + port: port_opt, + path: path_opt, }, )) } @@ -109,7 +85,6 @@ impl<'a> GitUrl2<'a> { } // Must start with alpha character, then alpha/digit/+/-/. - //let (input, scheme) = opt(recognize(pair( context( "Scheme parse", opt(terminated( @@ -123,66 +98,68 @@ impl<'a> GitUrl2<'a> { || c == '.' }), )), - // We consume the "://" here to allow scheme to be optional + // Not part of spec. We consume the "://" here to more easily manage scheme to be optional tag("://"), )), ) - //.parse(input)?; .parse(input) - - //Ok((input, scheme)) } - //pub fn parse_hier_part(scheme: bool, input: &'a str) -> IResult<&'a str, Option<&'a str>> { - pub fn parse_hier_part(input: &'a str) -> IResult<&'a str, (((Option<&str>, Option<&str>), Option<&str>, Option<&str>),Option<&'a str>)> { - //let input = if scheme { - // let (input, _) = tag("//")(input)?; - // input - //} else { - // input - //}; - + // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2 + // The rfc says parsing the "//" part of the uri belongs to the hier-part parsing + // but we only support common internet protocols, file paths, but not other "baseless" ones + // so it is sensible for this move it with scheme parsing to support git user service urls + pub fn parse_hier_part( + input: &'a str, + ) -> IResult< + &'a str, + ( + ((Option<&str>, Option<&str>), Option<&str>, Option<&str>), + Option<&'a str>, + ), + > { let (input, authority) = Self::parse_authority(input)?; //println!("authority: {authority:?}"); - //let (input, part) = self.path_abempty(input); - let (input, part) = alt(( - preceded(tag("//"), Self::path_abempty_parser()), - Self::path_rootless_parser(), - Self::path_ssh_parser(), - )) - .parse(input)?; - //alt((self.path_ssh_parser(), self.path_abempty_parser())).parse(input)?; - // / path-absolute - // / path-rootless - // / path-empty + let (input, part) = context( + "Top of path parsers", + alt(( + //preceded(tag("//"), Self::path_abempty_parser()), + Self::path_abempty_parser(), + Self::path_rootless_parser(), + Self::path_ssh_parser(), + )), + ) + .parse(input)?; Ok((input, (authority, Some(part)))) } - pub fn parse_authority(input: &'a str) -> IResult<&'a str, ((Option<&str>, Option<&str>), Option<&str>, Option<&str>)> { + pub fn parse_authority( + input: &'a str, + ) -> IResult<&'a str, ((Option<&str>, Option<&str>), Option<&str>, Option<&str>)> { let original = input; // Optional: username / token let (input, userinfo) = Self::parse_userinfo(input)?; // Host - let (input, host) = - opt(recognize(take_while(|c: char| reg_name_uri_chars(c)))).parse(input)?; - - // Verify if found host is more than symbols - if let Some(host) = host { - let is_alphanum = host.chars().into_iter().find(|c| is_alphanum(*c)).is_some(); - if !is_alphanum { - return Ok((original, ((None, None), None, None))); - } - } + let (input, host) = context( + "Host parser", + opt(verify( + recognize(take_while(|c: char| reg_name_uri_chars(c))), + |s: &str| { + let has_alphanum = s.chars().into_iter().find(|c| is_alphanum(*c)).is_some(); + let starts_with_alphanum = s.chars().next().is_some_and(|c| is_alphanum(c)); + + has_alphanum && starts_with_alphanum + }, + )), + ) + .parse(input)?; // Optional: port let (input, port) = Self::parse_port(input)?; - if let Some(port) = port { - //println!("port: {port:?}"); - } Ok((input, (userinfo, host, port))) } @@ -191,43 +168,48 @@ impl<'a> GitUrl2<'a> { authority_input: &'a str, ) -> IResult<&'a str, (Option<&'a str>, Option<&'a str>)> { // Peek for username@ - let mut check = peek(pair( - take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':'), - tag::<&str, &str, nom::error::Error<&str>>("@"), - )); + let mut check = context( + "Userinfo validation", + peek(pair( + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':'), + tag::<&str, &str, nom::error::Error<&str>>("@"), + )), + ); if check.parse(authority_input).is_err() { return Ok((authority_input, (None, None))); } - // Username - let (authority_input, userinfo) = opt(recognize(take_while(|c: char| { - unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' - }))) + // Userinfo + let (authority_input, userinfo) = context( + "Userinfo parser", + opt(recognize(take_while(|c: char| { + unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' + }))), + ) .parse(authority_input)?; let (authority_input, _) = if userinfo.is_some() { - tag("@")(authority_input)? + context("Userinfo '@' parser", tag("@")).parse(authority_input)? } else { // No change to input, but let the compiler be happy (authority_input, authority_input) }; - // Should I parse token in here? - + // Break down userinfo into user and token let (user, token) = if let Some(userinfo) = userinfo { if userinfo.contains(":") { - let (_, (user, token)) = separated_pair( - take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), - tag(":"), - take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + let (_, (user, token)) = context( + "Userinfo with colon parser", + separated_pair( + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + tag(":"), + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + ), ) .parse(userinfo)?; - //println!("user: {user:?}"); - //println!("token: {token:?}"); (Some(user), Some(token)) } else { - //println!("user: {userinfo:?}"); (Some(userinfo), None) } } else { @@ -238,7 +220,7 @@ impl<'a> GitUrl2<'a> { } pub fn parse_port(authority_input: &'a str) -> IResult<&'a str, Option<&'a str>> { - opt(preceded(tag(":"), digit1)).parse(authority_input) + context("Port parser", opt(preceded(tag(":"), digit1))).parse(authority_input) } // This will get absolute paths. @@ -252,10 +234,13 @@ impl<'a> GitUrl2<'a> { Error = nom::error::Error<&'a str>, >{ // Starts with '/' or empty - recognize(many1(pair( - tag("/"), - take_while(|c: char| pchar_uri_chars(c)), - ))) + context( + "Path parser (abempty)", + recognize(many1(pair( + tag("/"), + take_while(|c: char| pchar_uri_chars(c)), + ))), + ) } pub fn path_ssh_parser( @@ -266,29 +251,16 @@ impl<'a> GitUrl2<'a> { >>::Output, Error = nom::error::Error<&'a str>, >{ - recognize(( - tag(":"), - take_while(|c: char| pchar_uri_chars(c)), - many1(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), - )) + context( + "Path parser (ssh)", + recognize(( + tag(":"), + take_while(|c: char| pchar_uri_chars(c)), + many1(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), + )), + ) } - //pub fn path_absolute_parser<'a>( - // &self, - //) -> impl Parser< - // &str, - // Output = > as Parser< - // &str, - // >>::Output, - // Error = nom::error::Error<&str>, - //> { - // // Starts with '/' but not "//" - // recognize(many1(pair( - // tag("/"), - // take_while(|c: char| pchar_uri_chars(c)), - // ))) - //} - pub fn path_rootless_parser( ) -> impl Parser< &'a str, @@ -297,23 +269,13 @@ impl<'a> GitUrl2<'a> { >>::Output, Error = nom::error::Error<&'a str>, >{ - recognize(pair( - take_while(|c: char| pchar_uri_chars(c)), - many0(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), - )) - } -} - -fn as_slice_bounds<'a>(original: &'a str, field: Option<&'a str>) -> Option<&'a str> { - if let Some(field) = field { - if let Some(index) = original.find_substring(field) { - //println!("scheme slice: {}", &original[index..(index+scheme.len())]); - Some(&original[index..(index + field.len())]) - } else { - None - } - } else { - None + context( + "Path parser (rootless)", + recognize(pair( + take_while(|c: char| pchar_uri_chars(c)), + many0(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), + )), + ) } } @@ -323,7 +285,7 @@ fn pchar_uri_chars(c: char) -> bool { } fn reg_name_uri_chars(c: char) -> bool { - // *( unreserved / pct-encoded / sub-delims ) + // *( unreserved / pct-encoded (not implemented) / sub-delims ) unreserved_uri_chars(c) || subdelims_uri_chars(c) } fn unreserved_uri_chars(c: char) -> bool { @@ -383,7 +345,7 @@ fn main() -> Result<(), GitUrlParseError> { ////println!("{:?}\n", parsed); let parsed = GitUrl2::parse(test_url).unwrap(); - println!("{parsed:?}"); + println!("{parsed:#?}"); //println!("{:?}", parsed.parse()); println!(""); } From 44118b6077e2b7da2b84b932d414ed47504c7c54 Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Fri, 5 Sep 2025 09:39:04 -0700 Subject: [PATCH 18/32] Swapping out structs --- src/lib.rs | 4 +- src/types/error.rs | 5 +- src/types/mod.rs | 1058 ++++++++++++++++++++++++------------- src/types/provider/mod.rs | 70 +-- tests/parse.rs | 406 +++++++------- 5 files changed, 949 insertions(+), 594 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 02d7e9b..2f5351d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,7 @@ pub mod types; // Re-exports -pub use types::{ - GenericProvider, GitProvider, GitUrl, GitUrlBuilder, GitUrlBuilderError, GitUrlParseError, -}; +pub use types::{GenericProvider, GitProvider, GitUrl, GitUrlParseError}; #[cfg(feature = "tracing")] use tracing::debug; diff --git a/src/types/error.rs b/src/types/error.rs index 7559815..4c45ae5 100644 --- a/src/types/error.rs +++ b/src/types/error.rs @@ -1,10 +1,9 @@ -use super::GitUrlBuilderError; use thiserror::Error; #[derive(Error, Debug)] pub enum GitUrlParseError { - #[error("Error from derive_builder")] - DeriveBuilderError(#[from] GitUrlBuilderError), + //#[error("Error from derive_builder")] + //DeriveBuilderError(#[from] GitUrlOldBuilderError), //#[error("Error from Url crate: {0}")] //UrlParseError(#[from] url::ParseError), diff --git a/src/types/mod.rs b/src/types/mod.rs index e2e6af9..4579c2a 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -9,43 +9,49 @@ use std::fmt; use std::str::FromStr; use derive_builder::Builder; -use getset::{Getters, Setters}; +use getset::{CloneGetters, CopyGetters, Getters, Setters}; +use nom::Finish; use nom::branch::alt; use nom::bytes::complete::{tag, take_till, take_until, take_while}; use nom::character::complete::one_of; -use nom::sequence::{preceded, terminated}; +use nom::character::complete::{alpha1, digit1}; +use nom::combinator::{peek, recognize, verify}; +use nom::error::context; +use nom::multi::{many0, many1}; +use nom::sequence::{pair, preceded, separated_pair, terminated}; use nom::{IResult, Parser, combinator::opt, combinator::rest}; + use strum::{Display, EnumString, VariantNames}; #[cfg(feature = "tracing")] use tracing::debug; use typed_path::{Utf8TypedPath, Utf8TypedPathBuf}; -// todo: let's get rid of this -/// Supported uri schemes for parsing -#[derive(Debug, PartialEq, Eq, EnumString, VariantNames, Clone, Display)] -#[strum(serialize_all = "kebab_case")] -pub(crate) enum Scheme { - /// Represents `file://` url scheme - File, - /// Represents `ftp://` url scheme - Ftp, - /// Represents `ftps://` url scheme - Ftps, - /// Represents `git://` url scheme - Git, - /// Represents `git+ssh://` url scheme - #[strum(serialize = "git+ssh")] - GitSsh, - /// Represents `http://` url scheme - Http, - /// Represents `https://` url scheme - Https, - /// Represents `ssh://` url scheme - Ssh, - ///// Represents No url scheme - //Unspecified, - Other(String), // todo: need test for this -} +//// todo: let's get rid of this +///// Supported uri schemes for parsing +//#[derive(Debug, PartialEq, Eq, EnumString, VariantNames, Clone, Display)] +//#[strum(serialize_all = "kebab_case")] +//pub(crate) enum Scheme { +// /// Represents `file://` url scheme +// File, +// /// Represents `ftp://` url scheme +// Ftp, +// /// Represents `ftps://` url scheme +// Ftps, +// /// Represents `git://` url scheme +// Git, +// /// Represents `git+ssh://` url scheme +// #[strum(serialize = "git+ssh")] +// GitSsh, +// /// Represents `http://` url scheme +// Http, +// /// Represents `https://` url scheme +// Https, +// /// Represents `ssh://` url scheme +// Ssh, +// ///// Represents No url scheme +// //Unspecified, +// Other(String), // todo: need test for this +//} #[derive(Clone, Debug, Default, PartialEq, Eq)] pub(crate) enum GitUrlParseHint { @@ -56,383 +62,721 @@ pub(crate) enum GitUrlParseHint { Httplike, } -/// GitUrl represents an input url that is a url used by git -/// Internally during parsing the url is sanitized and uses the `url` crate to perform -/// the majority of the parsing effort, and with some extra handling to expose -/// metadata used my many git hosting services -#[derive(Debug, PartialEq, Eq, Clone, Builder, Getters, Setters)] -#[builder(build_fn(validate = "Self::prebuild_check"), field(public))] -#[get = "pub"] -pub struct GitUrl

-where - P: GitProvider, +///// GitUrl represents an input url that is a url used by git +///// Internally during parsing the url is sanitized and uses the `url` crate to perform +///// the majority of the parsing effort, and with some extra handling to expose +///// metadata used my many git hosting services +//#[derive(Debug, PartialEq, Eq, Clone, Builder, Getters, Setters)] +//#[builder(build_fn(validate = "Self::prebuild_check"), field(public))] +//#[get = "pub"] +//pub struct GitUrlOld

+//where +// P: GitProvider, +//{ +// /// The host, domain or IP of the repo +// #[builder(setter(into, strip_option), default)] +// host: Option, +// /// The url scheme +// #[builder(setter(into, strip_option), default)] +// scheme: Option, +// /// Authentication user +// #[builder(setter(into, strip_option), default)] +// #[getset(set = "pub(crate)")] +// user: Option, +// /// Authentication token (could appear in the https urls) +// #[builder(setter(into, strip_option), default)] +// #[getset(set = "pub(crate)")] +// token: Option, +// /// The port where git service is hosted +// #[builder(setter(into, strip_option), default)] +// port: Option, +// /// The path to repo w/ respect to user + hostname +// #[builder(setter(into))] +// path: Utf8TypedPathBuf, +// /// Include scheme:// when printing url +// #[builder(default)] +// print_scheme: bool, +// /// Hosted git provider info derived from GitUrl +// #[builder(setter(into, strip_option), default)] +// provider: Option

, +//} + +//impl> GitUrlOldBuilder

{ +// pub fn trim_auth(&mut self) { +// self.user = None; +// self.token = None; +// } +// +// fn prebuild_check(&self) -> Result<(), String> { +// //#[cfg(feature = "tracing")] +// //debug!("Processing: {:?}", &url); +// +// // Error if there are null bytes within the url +// +// // https://github.com/tjtelan/git-url-parse-rs/issues/16 +// if let Some(Some(host)) = &self.host { +// if host.contains('\0') { +// return Err(GitUrlParseError::FoundNullBytes.to_string()); +// } +// +// if host.is_empty() { +// return Err( +// GitUrlParseError::UnexpectedEmptyValue(String::from("host")).to_string() +// ); +// } +// } +// +// if let Some(Some(user)) = &self.user { +// if user.contains('\0') { +// return Err(GitUrlParseError::FoundNullBytes.to_string()); +// } +// +// if user.is_empty() { +// return Err( +// GitUrlParseError::UnexpectedEmptyValue(String::from("user")).to_string() +// ); +// } +// } +// +// if let Some(Some(token)) = &self.token { +// if token.contains('\0') { +// return Err(GitUrlParseError::FoundNullBytes.to_string()); +// } +// +// if token.is_empty() { +// return Err( +// GitUrlParseError::UnexpectedEmptyValue(String::from("token")).to_string(), +// ); +// } +// } +// +// if let Some(path) = &self.path { +// if path.as_str().contains('\0') { +// return Err(GitUrlParseError::FoundNullBytes.to_string()); +// } +// if path.as_str().is_empty() { +// return Err( +// GitUrlParseError::UnexpectedEmptyValue(String::from("path")).to_string() +// ); +// } +// } +// +// Ok(()) +// } +// +// fn parse(url: &str) -> Result { +// debug!("{url}"); +// let mut giturl = GitUrlOldBuilder::default(); +// let mut working_url = url; +// let mut hint = GitUrlParseHint::default(); +// +// giturl.parse_scheme(&mut working_url, &mut hint); +// giturl.parse_auth_info(&mut working_url, &mut hint); +// let save_state = working_url; +// +// giturl.parse_host_port(&mut working_url, &mut hint); +// +// match hint { +// GitUrlParseHint::Httplike => { +// if working_url.starts_with(":") && giturl.port.is_none() { +// return Err(GitUrlParseError::UnexpectedFormat); +// } else { +// println!("Nothing wrong here: {working_url}"); +// } +// } +// GitUrlParseHint::Sshlike => { +// giturl.parse_ssh_path(&mut working_url, &mut hint); +// } +// GitUrlParseHint::Filelike | GitUrlParseHint::Unknown => { +// working_url = save_state; +// giturl.host = None; +// giturl.scheme(Scheme::File); +// } +// } +// +// giturl.parse_path(&mut working_url, &mut hint); +// +// Ok(giturl) +// } +// +// fn parse_scheme(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { +// let mut builder = self.clone(); +// +// if let Ok((leftover, Some(s))) = GitUrlOldBuilder::

::_parse_scheme(working_url) { +// println!("leftover: {leftover}, scheme: {s:?}"); +// +// let scheme = Scheme::from_str(s).expect("Unknown scheme"); +// +// *hint = match &scheme { +// Scheme::Ssh => GitUrlParseHint::Sshlike, +// Scheme::File => GitUrlParseHint::Filelike, +// _ => GitUrlParseHint::Httplike, +// }; +// +// builder.scheme(scheme); +// builder.print_scheme(true); +// +// *self = builder; +// *working_url = leftover; +// } +// } +// +// fn parse_auth_info(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { +// let mut builder = self.clone(); +// if let Ok((leftover, Some(username))) = GitUrlOldBuilder::

::_parse_username(working_url) +// { +// println!("leftover: {leftover}, username: {username:?}"); +// builder.user(username); +// +// if *hint == GitUrlParseHint::Unknown { +// *hint = GitUrlParseHint::Sshlike; +// } +// +// if let Ok((token, Some(real_username))) = GitUrlOldBuilder::

::_parse_token(username) +// { +// println!("token: {token}, real_username: {real_username:?}"); +// builder.user(real_username); +// builder.token(token); +// +// if *hint == GitUrlParseHint::Unknown || *hint == GitUrlParseHint::Sshlike { +// *hint = GitUrlParseHint::Httplike; +// } +// } +// +// *working_url = leftover; +// *self = builder; +// } +// } +// +// fn parse_host_port(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { +// let mut builder = self.clone(); +// let mut save = working_url.clone(); +// +// if let Ok((leftover, Some(hostname))) = GitUrlOldBuilder::

::_parse_hostname(save) { +// println!("leftover {leftover}, hostname: {hostname}"); +// builder.host(hostname); +// save = leftover; +// } +// +// if let Ok((leftover, Some(port))) = GitUrlOldBuilder::

::_parse_port(save) { +// if !port.is_empty() { +// println!("leftover {leftover}, port: {port}"); +// builder.port(u16::from_str(port).expect("Not a valid port")); +// save = leftover; +// +// // If we're currently uncertain, but we've found a port +// // our guess is this more likely is an http url than an ssh url +// // Add the `ssh://` scheme to the url if this is incorrect +// if *hint == GitUrlParseHint::Unknown { +// *hint = GitUrlParseHint::Httplike; +// } +// } +// } +// +// // https://mslinn.com/git/040-git-urls.html - we only support relative paths when we have ports +// if builder.port.is_none() && save.starts_with(":") { +// *hint = GitUrlParseHint::Sshlike; +// } +// +// *self = builder; +// *working_url = save; +// } +// +// fn parse_ssh_path(&mut self, working_url: &mut &str, _hint: &mut GitUrlParseHint) { +// let mut builder = self.clone(); +// +// if let Ok((_leftover, Some(path))) = GitUrlOldBuilder::

::_parse_ssh_path(working_url) { +// builder.scheme(Scheme::Ssh); +// +// *self = builder; +// *working_url = path; +// } +// } +// +// fn parse_path(&mut self, working_url: &mut &str, _hint: &mut GitUrlParseHint) { +// let mut builder = self.clone(); +// if let Ok((leftover, path)) = GitUrlOldBuilder::

::_parse_path(working_url) { +// println!("leftover {leftover}, path: {path}"); +// +// let parsed_path = Utf8TypedPath::derive(path).to_path_buf(); +// builder.path(parsed_path); +// +// *self = builder; +// *working_url = leftover; +// } +// } +// +// //// +// +// fn _parse_scheme(input: &str) -> IResult<&str, Option<&str>> { +// opt(terminated( +// alt(( +// // Fancy: Can I build an iter map on this? +// tag(Scheme::File.to_string().as_bytes()), +// tag(Scheme::Ftps.to_string().as_bytes()), +// tag(Scheme::Ftp.to_string().as_bytes()), +// tag(Scheme::GitSsh.to_string().as_bytes()), +// tag(Scheme::Git.to_string().as_bytes()), +// tag(Scheme::Https.to_string().as_bytes()), +// tag(Scheme::Http.to_string().as_bytes()), +// tag(Scheme::Ssh.to_string().as_bytes()), +// // todo: Other(), needs a test +// )), +// tag("://"), +// )) +// .parse(input) +// } +// +// fn _parse_username(input: &str) -> IResult<&str, Option<&str>> { +// opt(terminated(take_until("@"), tag("@"))).parse(input) +// } +// +// fn _parse_token(input: &str) -> IResult<&str, Option<&str>> { +// opt(terminated(take_until(":"), tag(":"))).parse(input) +// } +// +// fn _parse_hostname(input: &str) -> IResult<&str, Option<&str>> { +// opt(take_till(|c| c == '/' || c == ':')).parse(input) +// } +// +// fn _parse_port(input: &str) -> IResult<&str, Option<&str>> { +// opt(preceded(tag(":"), take_while(|c: char| c.is_ascii_digit()))).parse(input) +// } +// +// // This is making an assumption that the path is relative, not absolute +// // This is bc we do not support absolute paths when we also have a port +// fn _parse_ssh_path(input: &str) -> IResult<&str, Option<&str>> { +// opt(preceded(one_of("/:"), rest)).parse(input) +// } +// +// fn _parse_path(input: &str) -> IResult<&str, &str> { +// rest(input) +// } +//} + +///// Build the printable GitUrl from its components +//impl fmt::Display for GitUrlOld { +// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +// let scheme = if let Some(scheme) = &self.scheme() +// && *self.print_scheme() +// { +// format!("{scheme}://") +// } else { +// String::new() +// }; +// +// let auth_info = match self.scheme() { +// Some(Scheme::Ssh) | Some(Scheme::Git) | Some(Scheme::GitSsh) => { +// if let Some(user) = &self.user() { +// format!("{user}@") +// } else { +// String::new() +// } +// } +// Some(Scheme::Http) | Some(Scheme::Https) => match (&self.user(), &self.token()) { +// (Some(user), Some(token)) => format!("{user}:{token}@"), +// (Some(user), None) => format!("{user}@",), +// (None, Some(token)) => format!("{token}@"), +// (None, None) => String::new(), +// }, +// _ => String::new(), +// }; +// +// let host = match &self.host() { +// Some(host) => host.to_string(), +// None => String::new(), +// }; +// +// let port = match &self.port() { +// Some(p) => format!(":{p}",), +// None => String::new(), +// }; +// +// let path = if self.scheme().clone() == Some(Scheme::Ssh) { +// if self.port().is_some() { +// if !self.path().as_str().starts_with('/') { +// format!("/{}", &self.path()) +// } else { +// self.path().to_string() +// } +// } else { +// format!(":{}", &self.path()) +// } +// } else { +// self.path().to_string() +// }; +// +// let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}"); +// +// write!(f, "{git_url_str}",) +// } +//} +// +//impl FromStr for GitUrlOld { +// //type Err = GitUrlParseError; +// type Err = GitUrlOldBuilderError; +// +// fn from_str(s: &str) -> Result { +// GitUrlOld::parse(s) +// } +//} +// +//impl GitUrlOld { +// /// Returns `GitUrl` after removing `user` and `token` values +// /// Intended use-case is for non-destructive printing GitUrl excluding any embedded auth info +// pub fn trim_auth(&self) -> GitUrlOld { +// let mut new_giturl = self.clone(); +// new_giturl.set_user(None); +// new_giturl.set_token(None); +// new_giturl +// } +// +// /// Returns a `Result` after normalizing and parsing `url` for metadata +// pub fn parse(url: &str) -> Result { +// let giturl = GitUrlOldBuilder::parse(url).unwrap(); +// giturl.build() +// } +// +// pub fn provider_info(&self) -> Result +// where +// T: GitProvider, +// { +// T::from_git_url(self) +// } +//} + +#[derive(Clone, Debug, CopyGetters, CloneGetters, Setters, Default, PartialEq, Eq)] +//pub struct GitUrl<'a, P = GenericProvider> +pub struct GitUrl<'a> +//where +// P: GitProvider, GitUrlParseError>, { - /// The host, domain or IP of the repo - #[builder(setter(into, strip_option), default)] - host: Option, - /// The url scheme - #[builder(setter(into, strip_option), default)] - scheme: Option, - /// Authentication user - #[builder(setter(into, strip_option), default)] - #[getset(set = "pub(crate)")] - user: Option, - /// Authentication token (could appear in the https urls) - #[builder(setter(into, strip_option), default)] - #[getset(set = "pub(crate)")] - token: Option, - /// The port where git service is hosted - #[builder(setter(into, strip_option), default)] - port: Option, - /// The path to repo w/ respect to user + hostname - #[builder(setter(into))] - path: Utf8TypedPathBuf, + #[getset(get_clone = "pub", set = "pub(crate)")] + url: String, + #[getset(get_copy = "pub", set = "pub(crate)")] + scheme: Option<&'a str>, + #[getset(get_copy = "pub", set = "pub(crate)")] + user: Option<&'a str>, + #[getset(get_copy = "pub", set = "pub(crate)")] + token: Option<&'a str>, + #[getset(get_copy = "pub")] + host: Option<&'a str>, + #[getset(get_copy = "pub")] + port: Option<&'a str>, + #[getset(get_copy = "pub")] + path: Option<&'a str>, /// Include scheme:// when printing url - #[builder(default)] + #[getset(get_copy = "pub")] print_scheme: bool, - /// Hosted git provider info derived from GitUrl - #[builder(setter(into, strip_option), default)] - provider: Option

, + ///// Hosted git provider info derived from GitUrl + //#[getset(skip)] + //provider: Option

, } -impl> GitUrlBuilder

{ - pub fn trim_auth(&mut self) { - self.user = None; - self.token = None; +impl<'a> GitUrl<'a> { + /// Returns `GitUrl` after removing `user` and `token` values + /// Intended use-case is for non-destructive printing GitUrl excluding any embedded auth info + pub fn trim_auth(&self) -> GitUrl { + let mut new_giturl = self.clone(); + new_giturl.set_user(None); + new_giturl.set_token(None); + new_giturl } - - fn prebuild_check(&self) -> Result<(), String> { - //#[cfg(feature = "tracing")] - //debug!("Processing: {:?}", &url); - + // https://datatracker.ietf.org/doc/html/rfc3986 + // Based on rfc3986, but does not strictly cover the spec + // * No support for: + // * query, fragment, percent-encoding, and much of the edges for path support + // * many forms of ip representations like ipv6, hexdigits + // * Added support for: + // * parsing ssh git urls which use ":" as a delimiter between the authority and path + // * parsing userinfo into user:token (but its officially deprecated, per #section-3.2.1) + // * some limited support for windows/linux filepaths + pub fn parse(input: &'a str) -> Result { // Error if there are null bytes within the url - // https://github.com/tjtelan/git-url-parse-rs/issues/16 - if let Some(Some(host)) = &self.host { - if host.contains('\0') { - return Err(GitUrlParseError::FoundNullBytes.to_string()); - } - - if host.is_empty() { - return Err( - GitUrlParseError::UnexpectedEmptyValue(String::from("host")).to_string() - ); - } - } - - if let Some(Some(user)) = &self.user { - if user.contains('\0') { - return Err(GitUrlParseError::FoundNullBytes.to_string()); - } - - if user.is_empty() { - return Err( - GitUrlParseError::UnexpectedEmptyValue(String::from("user")).to_string() - ); - } - } - - if let Some(Some(token)) = &self.token { - if token.contains('\0') { - return Err(GitUrlParseError::FoundNullBytes.to_string()); - } - - if token.is_empty() { - return Err( - GitUrlParseError::UnexpectedEmptyValue(String::from("token")).to_string(), - ); - } + if input.contains('\0') { + return Err(GitUrlParseError::FoundNullBytes); } - if let Some(path) = &self.path { - if path.as_str().contains('\0') { - return Err(GitUrlParseError::FoundNullBytes.to_string()); - } - if path.as_str().is_empty() { - return Err( - GitUrlParseError::UnexpectedEmptyValue(String::from("path")).to_string() - ); - } - } - - Ok(()) - } - - fn parse(url: &str) -> Result { - debug!("{url}"); - let mut giturl = GitUrlBuilder::default(); - let mut working_url = url; - let mut hint = GitUrlParseHint::default(); - - giturl.parse_scheme(&mut working_url, &mut hint); - giturl.parse_auth_info(&mut working_url, &mut hint); - let save_state = working_url; - - giturl.parse_host_port(&mut working_url, &mut hint); - - match hint { - GitUrlParseHint::Httplike => { - if working_url.starts_with(":") && giturl.port.is_none() { - return Err(GitUrlParseError::UnexpectedFormat); - } else { - println!("Nothing wrong here: {working_url}"); - } - } - GitUrlParseHint::Sshlike => { - giturl.parse_ssh_path(&mut working_url, &mut hint); - } - GitUrlParseHint::Filelike | GitUrlParseHint::Unknown => { - working_url = save_state; - giturl.host = None; - giturl.scheme(Scheme::File); - } - } - - giturl.parse_path(&mut working_url, &mut hint); - - Ok(giturl) - } - - fn parse_scheme(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { - let mut builder = self.clone(); - - if let Ok((leftover, Some(s))) = GitUrlBuilder::

::_parse_scheme(working_url) { - println!("leftover: {leftover}, scheme: {s:?}"); - - let scheme = Scheme::from_str(s).expect("Unknown scheme"); - - *hint = match &scheme { - Scheme::Ssh => GitUrlParseHint::Sshlike, - Scheme::File => GitUrlParseHint::Filelike, - _ => GitUrlParseHint::Httplike, - }; - - builder.scheme(scheme); - builder.print_scheme(true); - - *self = builder; - *working_url = leftover; - } - } - - fn parse_auth_info(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { - let mut builder = self.clone(); - if let Ok((leftover, Some(username))) = GitUrlBuilder::

::_parse_username(working_url) { - println!("leftover: {leftover}, username: {username:?}"); - builder.user(username); - - if *hint == GitUrlParseHint::Unknown { - *hint = GitUrlParseHint::Sshlike; - } - - if let Ok((token, Some(real_username))) = GitUrlBuilder::

::_parse_token(username) { - println!("token: {token}, real_username: {real_username:?}"); - builder.user(real_username); - builder.token(token); - - if *hint == GitUrlParseHint::Unknown || *hint == GitUrlParseHint::Sshlike { - *hint = GitUrlParseHint::Httplike; - } - } - - *working_url = leftover; - *self = builder; - } - } + let original = input; - fn parse_host_port(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { - let mut builder = self.clone(); - let mut save = working_url.clone(); + let (input, scheme) = Self::parse_scheme.parse(input).finish().unwrap_or_default(); + let (_input, heir_part) = Self::parse_hier_part(input).finish().unwrap_or_default(); - if let Ok((leftover, Some(hostname))) = GitUrlBuilder::

::_parse_hostname(save) { - println!("leftover {leftover}, hostname: {hostname}"); - builder.host(hostname); - save = leftover; - } + let (user_opt, token_opt) = heir_part.0.0; + let host_opt = heir_part.0.1; + let port_opt = heir_part.0.2; + let path_opt = heir_part.1; - if let Ok((leftover, Some(port))) = GitUrlBuilder::

::_parse_port(save) { - if !port.is_empty() { - println!("leftover {leftover}, port: {port}"); - builder.port(u16::from_str(port).expect("Not a valid port")); - save = leftover; - - // If we're currently uncertain, but we've found a port - // our guess is this more likely is an http url than an ssh url - // Add the `ssh://` scheme to the url if this is incorrect - if *hint == GitUrlParseHint::Unknown { - *hint = GitUrlParseHint::Httplike; - } + // This needs another pass + let provider = if let Some(scheme) = scheme { + if scheme == "http" || scheme == "https" || scheme == "ssh" { + Some(GenericProvider::default()) + } else { + None } - } - - // https://mslinn.com/git/040-git-urls.html - we only support relative paths when we have ports - if builder.port.is_none() && save.starts_with(":") { - *hint = GitUrlParseHint::Sshlike; - } + } else { + None + }; - *self = builder; - *working_url = save; + Ok(GitUrl { + url: original.to_string(), + scheme, + user: user_opt, + token: token_opt, + host: host_opt, + port: port_opt, + path: path_opt, + print_scheme: scheme.is_some(), + //provider + }) } - fn parse_ssh_path(&mut self, working_url: &mut &str, _hint: &mut GitUrlParseHint) { - let mut builder = self.clone(); - - if let Ok((_leftover, Some(path))) = GitUrlBuilder::

::_parse_ssh_path(working_url) { - builder.scheme(Scheme::Ssh); + pub fn parse_scheme(input: &'a str) -> IResult<&'a str, Option<&'a str>> { + let mut check = context( + "scheme validate", + peek(pair( + pair( + alpha1, + take_while(|c: char| { + c.is_ascii_alphabetic() + || c.is_ascii_digit() + || c == '+' + || c == '-' + || c == '.' + }), + ), + tag::<&str, &str, nom::error::Error<&str>>("://"), + )), + ); - *self = builder; - *working_url = path; + if check.parse(input).is_err() { + return Ok((input, None)); } - } - - fn parse_path(&mut self, working_url: &mut &str, _hint: &mut GitUrlParseHint) { - let mut builder = self.clone(); - if let Ok((leftover, path)) = GitUrlBuilder::

::_parse_path(working_url) { - println!("leftover {leftover}, path: {path}"); - let parsed_path = Utf8TypedPath::derive(path).to_path_buf(); - builder.path(parsed_path); - - *self = builder; - *working_url = leftover; - } + // Must start with alpha character, then alpha/digit/+/-/. + context( + "Scheme parse", + opt(terminated( + recognize(pair( + alpha1, + take_while(|c: char| { + c.is_ascii_alphabetic() + || c.is_ascii_digit() + || c == '+' + || c == '-' + || c == '.' + }), + )), + // Not part of spec. We consume the "://" here to more easily manage scheme to be optional + tag("://"), + )), + ) + .parse(input) } - //// - - fn _parse_scheme(input: &str) -> IResult<&str, Option<&str>> { - opt(terminated( + // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2 + // The rfc says parsing the "//" part of the uri belongs to the hier-part parsing + // but we only support common internet protocols, file paths, but not other "baseless" ones + // so it is sensible for this move it with scheme parsing to support git user service urls + pub fn parse_hier_part( + input: &'a str, + ) -> IResult< + &'a str, + ( + ((Option<&str>, Option<&str>), Option<&str>, Option<&str>), + Option<&'a str>, + ), + > { + let (input, authority) = Self::parse_authority(input)?; + //println!("authority: {authority:?}"); + + let (input, part) = context( + "Top of path parsers", alt(( - // Fancy: Can I build an iter map on this? - tag(Scheme::File.to_string().as_bytes()), - tag(Scheme::Ftps.to_string().as_bytes()), - tag(Scheme::Ftp.to_string().as_bytes()), - tag(Scheme::GitSsh.to_string().as_bytes()), - tag(Scheme::Git.to_string().as_bytes()), - tag(Scheme::Https.to_string().as_bytes()), - tag(Scheme::Http.to_string().as_bytes()), - tag(Scheme::Ssh.to_string().as_bytes()), - // todo: Other(), needs a test + //preceded(tag("//"), Self::path_abempty_parser()), + Self::path_abempty_parser(), + Self::path_rootless_parser(), + Self::path_ssh_parser(), )), - tag("://"), - )) - .parse(input) - } + ) + .parse(input)?; - fn _parse_username(input: &str) -> IResult<&str, Option<&str>> { - opt(terminated(take_until("@"), tag("@"))).parse(input) + Ok((input, (authority, Some(part)))) } - fn _parse_token(input: &str) -> IResult<&str, Option<&str>> { - opt(terminated(take_until(":"), tag(":"))).parse(input) - } + pub fn parse_authority( + input: &'a str, + ) -> IResult<&'a str, ((Option<&str>, Option<&str>), Option<&str>, Option<&str>)> { + let original = input; + + // Optional: username / token + let (input, userinfo) = Self::parse_userinfo(input)?; + + // Host + let (input, host) = context( + "Host parser", + opt(verify( + recognize(take_while(|c: char| reg_name_uri_chars(c))), + |s: &str| { + let has_alphanum = s.chars().into_iter().find(|c| is_alphanum(*c)).is_some(); + let starts_with_alphanum = s.chars().next().is_some_and(|c| is_alphanum(c)); + + has_alphanum && starts_with_alphanum + }, + )), + ) + .parse(input)?; - fn _parse_hostname(input: &str) -> IResult<&str, Option<&str>> { - opt(take_till(|c| c == '/' || c == ':')).parse(input) - } + // Optional: port + let (input, port) = Self::parse_port(input)?; - fn _parse_port(input: &str) -> IResult<&str, Option<&str>> { - opt(preceded(tag(":"), take_while(|c: char| c.is_ascii_digit()))).parse(input) + Ok((input, (userinfo, host, port))) } - // This is making an assumption that the path is relative, not absolute - // This is bc we do not support absolute paths when we also have a port - fn _parse_ssh_path(input: &str) -> IResult<&str, Option<&str>> { - opt(preceded(one_of("/:"), rest)).parse(input) - } + pub fn parse_userinfo( + authority_input: &'a str, + ) -> IResult<&'a str, (Option<&'a str>, Option<&'a str>)> { + // Peek for username@ + let mut check = context( + "Userinfo validation", + peek(pair( + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':'), + tag::<&str, &str, nom::error::Error<&str>>("@"), + )), + ); - fn _parse_path(input: &str) -> IResult<&str, &str> { - rest(input) - } -} + if check.parse(authority_input).is_err() { + return Ok((authority_input, (None, None))); + } -/// Build the printable GitUrl from its components -impl fmt::Display for GitUrl { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let scheme = if let Some(scheme) = &self.scheme() - && *self.print_scheme() - { - format!("{scheme}://") + // Userinfo + let (authority_input, userinfo) = context( + "Userinfo parser", + opt(recognize(take_while(|c: char| { + unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' + }))), + ) + .parse(authority_input)?; + + let (authority_input, _) = if userinfo.is_some() { + context("Userinfo '@' parser", tag("@")).parse(authority_input)? } else { - String::new() + // No change to input, but let the compiler be happy + (authority_input, authority_input) }; - let auth_info = match self.scheme() { - Some(Scheme::Ssh) | Some(Scheme::Git) | Some(Scheme::GitSsh) => { - if let Some(user) = &self.user() { - format!("{user}@") - } else { - String::new() - } - } - Some(Scheme::Http) | Some(Scheme::Https) => match (&self.user(), &self.token()) { - (Some(user), Some(token)) => format!("{user}:{token}@"), - (Some(user), None) => format!("{user}@",), - (None, Some(token)) => format!("{token}@"), - (None, None) => String::new(), - }, - _ => String::new(), - }; - - let host = match &self.host() { - Some(host) => host.to_string(), - None => String::new(), - }; - - let port = match &self.port() { - Some(p) => format!(":{p}",), - None => String::new(), - }; - - let path = if self.scheme().clone() == Some(Scheme::Ssh) { - if self.port().is_some() { - if !self.path().as_str().starts_with('/') { - format!("/{}", &self.path()) - } else { - self.path().to_string() - } + // Break down userinfo into user and token + let (user, token) = if let Some(userinfo) = userinfo { + if userinfo.contains(":") { + let (_, (user, token)) = context( + "Userinfo with colon parser", + separated_pair( + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + tag(":"), + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + ), + ) + .parse(userinfo)?; + (Some(user), Some(token)) } else { - format!(":{}", &self.path()) + (Some(userinfo), None) } } else { - self.path().to_string() + (None, None) }; - let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}"); - - write!(f, "{git_url_str}",) + Ok((authority_input, (user, token))) } -} -impl FromStr for GitUrl { - //type Err = GitUrlParseError; - type Err = GitUrlBuilderError; - - fn from_str(s: &str) -> Result { - GitUrl::parse(s) + pub fn parse_port(authority_input: &'a str) -> IResult<&'a str, Option<&'a str>> { + context("Port parser", opt(preceded(tag(":"), digit1))).parse(authority_input) } -} -impl GitUrl { - /// Returns `GitUrl` after removing `user` and `token` values - /// Intended use-case is for non-destructive printing GitUrl excluding any embedded auth info - pub fn trim_auth(&self) -> GitUrl { - let mut new_giturl = self.clone(); - new_giturl.set_user(None); - new_giturl.set_token(None); - new_giturl + // This will get absolute paths. + // todo: test for empty and start with "//" + pub fn path_abempty_parser( + ) -> impl Parser< + &'a str, + Output = > as Parser< + &'a str, + >>::Output, + Error = nom::error::Error<&'a str>, + >{ + // Starts with '/' or empty + context( + "Path parser (abempty)", + recognize(many1(pair( + tag("/"), + take_while(|c: char| pchar_uri_chars(c)), + ))), + ) } - /// Returns a `Result` after normalizing and parsing `url` for metadata - pub fn parse(url: &str) -> Result { - let giturl = GitUrlBuilder::parse(url).unwrap(); - giturl.build() + pub fn path_ssh_parser( + ) -> impl Parser< + &'a str, + Output = > as Parser< + &'a str, + >>::Output, + Error = nom::error::Error<&'a str>, + >{ + context( + "Path parser (ssh)", + recognize(( + tag(":"), + take_while(|c: char| pchar_uri_chars(c)), + many1(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), + )), + ) } - pub fn provider_info(&self) -> Result - where - T: GitProvider, - { - T::from_git_url(self) + pub fn path_rootless_parser( + ) -> impl Parser< + &'a str, + Output = > as Parser< + &'a str, + >>::Output, + Error = nom::error::Error<&'a str>, + >{ + context( + "Path parser (rootless)", + recognize(pair( + take_while(|c: char| pchar_uri_chars(c)), + many0(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), + )), + ) } } + +fn pchar_uri_chars(c: char) -> bool { + // unreserved / pct-encoded (not implemented) / sub-delims / ":" / "@" + unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' || c == '@' +} + +fn reg_name_uri_chars(c: char) -> bool { + // *( unreserved / pct-encoded (not implemented) / sub-delims ) + unreserved_uri_chars(c) || subdelims_uri_chars(c) +} +fn unreserved_uri_chars(c: char) -> bool { + is_alphanum(c) || c == '-' || c == '.' || c == '_' || c == '~' +} + +fn is_alphanum(c: char) -> bool { + c.is_ascii_alphabetic() || c.is_ascii_digit() +} + +fn subdelims_uri_chars(c: char) -> bool { + c == '!' + || c == '$' + || c == '&' + || c == '\'' + || c == '(' + || c == ')' + || c == '*' + || c == '+' + || c == ',' + || c == ';' + || c == '=' + || c == '\\' // This is not part of spec, but used for windows paths +} diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index 56618ef..24afd07 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -30,19 +30,19 @@ impl GenericProvider { } } -impl GitProvider for GenericProvider { +impl GitProvider, GitUrlParseError> for GenericProvider { fn from_git_url(url: &GitUrl) -> Result { - if let (Ok((_, Some((user, repo)))), Some(host)) = ( - GenericProvider::_get_owner_repo(url.path().as_str()), - url.host(), - ) { - Ok(GenericProvider { - host: host.clone(), - owner: String::from(user), - repo: String::from(repo), - }) + if let (Some(path), Some(host)) = (url.path(), url.host()) { + if let Ok((_, Some((user, repo)))) = GenericProvider::_get_owner_repo(path) { + Ok(GenericProvider { + host: host.to_string(), + owner: user.to_string(), + repo: repo.to_string(), + }) + } else { + Err(GitUrlParseError::UnexpectedFormat) + } } else { - // TODO: Check this error type later Err(GitUrlParseError::UnexpectedFormat) } } @@ -63,20 +63,20 @@ impl AzureDevOpsProvider { } } -impl GitProvider for AzureDevOpsProvider { +impl GitProvider, GitUrlParseError> for AzureDevOpsProvider { fn from_git_url(url: &GitUrl) -> Result { - if let (Ok((_, Some((user, repo)))), Some(host)) = ( - AzureDevOpsProvider::_get_user_repo(url.path().as_str()), - url.host(), - ) { - Ok(AzureDevOpsProvider { - host: host.clone(), - org: String::from(""), - project: String::from(user), - repo: String::from(repo), - }) + if let (Some(path), Some(host)) = (url.path(), url.host()) { + if let Ok((_, Some((user, repo)))) = AzureDevOpsProvider::_get_user_repo(path) { + Ok(AzureDevOpsProvider { + host: host.to_string(), + org: String::from(""), + project: String::from(user), + repo: String::from(repo), + }) + } else { + Err(GitUrlParseError::UnexpectedFormat) + } } else { - // TODO: Check this error type later Err(GitUrlParseError::UnexpectedFormat) } } @@ -97,20 +97,20 @@ impl GitLabProvider { } } -impl GitProvider for GitLabProvider { +impl GitProvider, GitUrlParseError> for GitLabProvider { fn from_git_url(url: &GitUrl) -> Result { - if let (Ok((_, Some((_user, repo)))), Some(host)) = ( - GitLabProvider::_get_user_repo(url.path().as_str()), - url.host(), - ) { - Ok(GitLabProvider { - host: host.clone(), - user: String::from(""), - subgroup: None, - repo: String::from(repo), - }) + if let (Some(path), Some(host)) = (url.path(), url.host()) { + if let Ok((_, Some((user, repo)))) = GitLabProvider::_get_user_repo(path) { + Ok(GitLabProvider { + host: host.to_string(), + user: String::from(""), + subgroup: None, + repo: String::from(repo), + }) + } else { + Err(GitUrlParseError::UnexpectedFormat) + } } else { - // TODO: Check this error type later Err(GitUrlParseError::UnexpectedFormat) } } diff --git a/tests/parse.rs b/tests/parse.rs index ef21762..d4d5a4d 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -3,31 +3,33 @@ use git_url_parse::*; fn ssh_user_ports() { let test_url = "ssh://git@host.tld:9999/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme("ssh".into()) - .host("host.tld") - .user("git") - .port(9999 as u16) - .path("user/project-name.git") - .print_scheme(true) - .build() - .unwrap(); - - assert_eq!(parsed, expected); + + assert_eq!( + parsed.url(), + "ssh://git@host.tld:9999/user/project-name.git" + ); + assert_eq!(parsed.scheme(), Some("ssh")); + assert_eq!(parsed.user(), Some("git")); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), Some("host.tld")); + assert_eq!(parsed.port(), Some("9999")); + assert_eq!(parsed.path(), Some("user/project-name.git")); + assert_eq!(parsed.print_scheme(), true); } #[test] fn ssh_no_scheme_no_user() { let test_url = "host.tld:user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::Ssh) - .host("host.tld") - .path("user/project-name.git") - .build() - .unwrap(); - - assert_eq!(parsed, expected); + + assert_eq!(parsed.url(), "host.tld:user/project-name.git"); + assert_eq!(parsed.scheme(), Some("ssh")); + assert_eq!(parsed.user(), None); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), Some("host.tld")); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("user/project-name.git")); + assert_eq!(parsed.print_scheme(), false); } // Specific service support @@ -35,185 +37,192 @@ fn ssh_no_scheme_no_user() { fn https_user_bitbucket() { let test_url = "https://user@bitbucket.org/user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::Https) - .host("bitbucket.org") - .user("user") - .path("/user/repo.git") - .print_scheme(true) - .build() - .unwrap(); - - assert_eq!(parsed, expected); + + assert_eq!(parsed.url(), "https://user@bitbucket.org/user/repo.git"); + assert_eq!(parsed.scheme(), Some("https")); + assert_eq!(parsed.user(), Some("user")); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), Some("bitbucket.org")); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("/user/repo.git")); + assert_eq!(parsed.print_scheme(), true); } #[test] fn ssh_user_bitbucket() { let test_url = "git@bitbucket.org:user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::Ssh) - .host("bitbucket.org") - .user("git") - .path("user/repo.git") - .build() - .unwrap(); - - assert_eq!(parsed, expected); + + assert_eq!(parsed.url(), "git@bitbucket.org:user/repo.git"); + assert_eq!(parsed.scheme(), Some("ssh")); + assert_eq!(parsed.user(), Some("git")); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), Some("bitbucket.org")); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("user/repo.git")); + assert_eq!(parsed.print_scheme(), false); } #[test] fn https_user_auth_bitbucket() { let test_url = "https://x-token-auth:token@bitbucket.org/owner/name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::Https) - .host("bitbucket.org") - .user("x-token-auth") - .token("token") - .path("/owner/name.git") - .print_scheme(true) - .build() - .unwrap(); - - assert_eq!(parsed, expected); + + assert_eq!( + parsed.url(), + "https://x-token-auth:token@bitbucket.org/owner/name.git" + ); + assert_eq!(parsed.scheme(), Some("https")); + assert_eq!(parsed.user(), Some("x-token-auth")); + assert_eq!(parsed.token(), Some("token")); + assert_eq!(parsed.host(), Some("bitbucket.org")); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("/owner/name.git")); + assert_eq!(parsed.print_scheme(), true); } #[test] fn https_user_github() { let test_url = "https://user@github.com/user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::Https) - .user("user") - .host("github.com") - .path("/user/repo.git") - .print_scheme(true) - .build() - .unwrap(); - - assert_eq!(parsed, expected); + + assert_eq!(parsed.url(), "https://user@github.com/user/repo.git"); + assert_eq!(parsed.scheme(), Some("https")); + assert_eq!(parsed.user(), Some("user")); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), Some("github.com")); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("/user/repo.git")); + assert_eq!(parsed.print_scheme(), true); } #[test] fn ssh_user_github() { let test_url = "git@github.com:user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::Ssh) - .user("git") - .host("github.com") - .path("user/repo.git") - .build() - .unwrap(); - - assert_eq!(parsed, expected); + + assert_eq!(parsed.url(), "git@github.com:user/repo.git"); + assert_eq!(parsed.scheme(), Some("ssh")); + assert_eq!(parsed.user(), Some("git")); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), Some("github.com")); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("user/repo.git")); + assert_eq!(parsed.print_scheme(), false); } #[test] fn https_user_auth_github() { let test_url = "https://token:x-oauth-basic@github.com/owner/name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::Https) - .user("token") - .token("x-oauth-basic") - .host("github.com") - .path("/owner/name.git") - .print_scheme(true) - .build() - .unwrap(); - - assert_eq!(parsed, expected); + + assert_eq!( + parsed.url(), + "https://token:x-oauth-basic@github.com/owner/name.git" + ); + assert_eq!(parsed.scheme(), Some("https")); + assert_eq!(parsed.user(), Some("token")); + assert_eq!(parsed.token(), Some("x-oauth-basic")); + assert_eq!(parsed.host(), Some("github.com")); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("/owner/name.git")); + assert_eq!(parsed.print_scheme(), true); } #[test] fn ssh_user_azure_devops() { let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::Ssh) - .user("git") - .host("ssh.dev.azure.com") - .path("v3/CompanyName/ProjectName/RepoName") - .build() - .unwrap(); - - assert_eq!(parsed, expected); + + assert_eq!( + parsed.url(), + "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName" + ); + assert_eq!(parsed.scheme(), Some("ssh")); + assert_eq!(parsed.user(), Some("git")); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), Some("ssh.dev.azure.com")); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("v3/CompanyName/ProjectName/RepoName")); + assert_eq!(parsed.print_scheme(), false); } #[test] fn https_user_azure_devops() { let test_url = "https://organization@dev.azure.com/organization/project/_git/repo"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::Https) - .user("organization") - .host("dev.azure.com") - .path("/organization/project/_git/repo") - .print_scheme(true) - .build() - .unwrap(); - - assert_eq!(parsed, expected); + + assert_eq!( + parsed.url(), + "https://organization@dev.azure.com/organization/project/_git/repo" + ); + assert_eq!(parsed.scheme(), Some("https")); + assert_eq!(parsed.user(), Some("organization")); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), Some("dev.azure.com")); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("/organization/project/_git/repo")); + assert_eq!(parsed.print_scheme(), true); } #[test] fn ftp_user() { let test_url = "ftp://git@host.tld/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::Ftp) - .user("git") - .host("host.tld") - .path("/user/project-name.git") - .print_scheme(true) - .build() - .unwrap(); - - assert_eq!(parsed, expected); + + assert_eq!(parsed.url(), "ftp://git@host.tld/user/project-name.git"); + assert_eq!(parsed.scheme(), Some("ftp")); + assert_eq!(parsed.user(), Some("git")); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), Some("host.tld")); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("/user/project-name.git")); + assert_eq!(parsed.print_scheme(), true); } #[test] fn ftps_user() { let test_url = "ftps://git@host.tld/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::Ftps) - .user("git") - .host("host.tld") - .path("/user/project-name.git") - .print_scheme(true) - .build() - .unwrap(); - - assert_eq!(parsed, expected); + + assert_eq!(parsed.url(), "ftps://git@host.tld/user/project-name.git"); + assert_eq!(parsed.scheme(), Some("ftps")); + assert_eq!(parsed.user(), Some("git")); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), Some("host.tld")); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("/user/project-name.git")); + assert_eq!(parsed.print_scheme(), true); } #[test] fn relative_unix_path() { let test_url = "../project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::File) - .path("../project-name.git") - .build() - .unwrap(); - assert_eq!(parsed, expected); + assert_eq!(parsed.url(), "../project-name.git"); + assert_eq!(parsed.scheme(), Some("file")); + assert_eq!(parsed.user(), None); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), None); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("../project-name.git")); + assert_eq!(parsed.print_scheme(), false); } #[test] fn absolute_unix_path() { let test_url = "/path/to/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::File) - .path(test_url) - .build() - .unwrap(); - assert_eq!(parsed, expected); + assert_eq!(parsed.url(), "/path/to/project-name.git"); + assert_eq!(parsed.scheme(), Some("file")); + assert_eq!(parsed.user(), None); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), None); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("/path/to/project-name.git")); + assert_eq!(parsed.print_scheme(), false); } // Issue #6 - Relative Windows paths will parse into Unix paths @@ -221,13 +230,15 @@ fn absolute_unix_path() { fn relative_windows_path() { let test_url = r"..\project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::File) - .path(test_url) - .build() - .unwrap(); - assert_eq!(parsed, expected); + assert_eq!(parsed.url(), r"..\project-name.git"); + assert_eq!(parsed.scheme(), Some("file")); + assert_eq!(parsed.user(), None); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), None); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("..\\project-name.git")); + assert_eq!(parsed.print_scheme(), false); } // Can I use `typed-path` to deal with this? @@ -237,59 +248,61 @@ fn relative_windows_path() { fn absolute_windows_path() { let test_url = r"c:\project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::File) - .path(test_url) - .build() - .unwrap(); - assert_eq!(parsed, expected); + assert_eq!(parsed.url(), "ftps://git@host.tld/user/project-name.git"); + assert_eq!(parsed.scheme(), Some("ftp")); + assert_eq!(parsed.user(), Some("git")); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), Some("host.tld")); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("/user/project-name.git")); + assert_eq!(parsed.print_scheme(), true); } -// Move test -//#[test] -//fn ssh_user_path_not_acctname_reponame_format() { -// let test_url = "git@test.com:repo"; -// let e = GitUrl::parse(test_url); +//// Move test +////#[test] +////fn ssh_user_path_not_acctname_reponame_format() { +//// let test_url = "git@test.com:repo"; +//// let e = GitUrl::parse(test_url); +//// +//// assert!(e.is_err()); +//// assert_eq!( +//// format!("{}", e.err().unwrap()), +//// "Git Url not in expected format" +//// ); +////} // -// assert!(e.is_err()); -// assert_eq!( -// format!("{}", e.err().unwrap()), -// "Git Url not in expected format" -// ); -//} - -// Move test -//#[test] -//fn ssh_without_organization() { -// let test_url = "ssh://f589726c3611:29418/repo"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// let expected = GitUrl { -// host: Some("f589726c3611".to_string()), -// //name: "repo".to_string(), -// //owner: Some("repo".to_string()), -// //organization: None, -// //fullname: "repo/repo".to_string(), -// scheme: Some(Scheme::Ssh), -// user: None, -// token: None, -// port: Some(29418), -// path: "repo".to_string(), -// //git_suffix: false, -// //scheme_prefix: true, -// print_scheme: true, -// }; +//// Move test +////#[test] +////fn ssh_without_organization() { +//// let test_url = "ssh://f589726c3611:29418/repo"; +//// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); +//// let expected = GitUrl { +//// host: Some("f589726c3611".to_string()), +//// //name: "repo".to_string(), +//// //owner: Some("repo".to_string()), +//// //organization: None, +//// //fullname: "repo/repo".to_string(), +//// scheme: Some(Scheme::Ssh), +//// user: None, +//// token: None, +//// port: Some(29418), +//// path: "repo".to_string(), +//// //git_suffix: false, +//// //scheme_prefix: true, +//// print_scheme: true, +//// }; +//// +//// assert_eq!(parsed, expected); +////} // -// assert_eq!(parsed, expected); -//} - -//#[test] -//fn empty_path() { -// assert_eq!( -// GitUrlParseError::EmptyPath, -// GitUrl::parse("file://").unwrap_err() -// ) -//} +////#[test] +////fn empty_path() { +//// assert_eq!( +//// GitUrlParseError::EmptyPath, +//// GitUrl::parse("file://").unwrap_err() +//// ) +////} #[test] fn bad_port_number() { @@ -297,10 +310,10 @@ fn bad_port_number() { let e = GitUrl::parse(test_url); assert!(e.is_err()); - assert_eq!( - format!("{}", e.err().unwrap()), - "Error from Url crate: invalid port number" - ); + //assert_eq!( + // format!("{}", e.err().unwrap()), + // "Error from Url crate: invalid port number" + //); } // This test might not have a use anymore if we're not expanding "git:" -> "git://" @@ -308,12 +321,13 @@ fn bad_port_number() { fn git() { let test_url = "git://github.com/owner/name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let expected = GitUrlBuilder::default() - .scheme(Scheme::Git) - .host("github.com") - .path("/owner/name.git") - .print_scheme(true) - .build() - .unwrap(); - assert_eq!(parsed, expected); + + assert_eq!(parsed.url(), "git://github.com/owner/name.git"); + assert_eq!(parsed.scheme(), Some("git")); + assert_eq!(parsed.user(), None); + assert_eq!(parsed.token(), None); + assert_eq!(parsed.host(), Some("github.com")); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), Some("/owner/name.git")); + assert_eq!(parsed.print_scheme(), true); } From b7d6e0e8e29fb65947f66da1b69f06d92068d72a Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Fri, 5 Sep 2025 10:10:40 -0700 Subject: [PATCH 19/32] Everything connects again But the tests are broken --- examples/trim_auth.rs | 4 +- src/types/mod.rs | 128 ++++++++++++++------------- tests/provider.rs | 22 ++--- tests/trim_auth.rs | 198 +++++++++++++++++++++--------------------- 4 files changed, 182 insertions(+), 170 deletions(-) diff --git a/examples/trim_auth.rs b/examples/trim_auth.rs index a685035..7ddcde4 100644 --- a/examples/trim_auth.rs +++ b/examples/trim_auth.rs @@ -1,4 +1,4 @@ -use git_url_parse::{GitUrl, GitUrlParseError}; +use git_url_parse::{GitUrlOld, GitUrlParseError}; fn main() -> Result<(), GitUrlParseError> { env_logger::init(); @@ -25,7 +25,7 @@ fn main() -> Result<(), GitUrlParseError> { println!("Original: {}", test_url); println!( "Parsed + Trimmed: {}\n", - GitUrl::parse(test_url).unwrap().trim_auth() + GitUrlOld::parse(test_url).unwrap().trim_auth() ); } Ok(()) diff --git a/src/types/mod.rs b/src/types/mod.rs index 4579c2a..2b36eae 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -354,64 +354,69 @@ pub(crate) enum GitUrlParseHint { // } //} -///// Build the printable GitUrl from its components -//impl fmt::Display for GitUrlOld { -// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -// let scheme = if let Some(scheme) = &self.scheme() -// && *self.print_scheme() -// { -// format!("{scheme}://") -// } else { -// String::new() -// }; -// -// let auth_info = match self.scheme() { -// Some(Scheme::Ssh) | Some(Scheme::Git) | Some(Scheme::GitSsh) => { -// if let Some(user) = &self.user() { -// format!("{user}@") -// } else { -// String::new() -// } -// } -// Some(Scheme::Http) | Some(Scheme::Https) => match (&self.user(), &self.token()) { -// (Some(user), Some(token)) => format!("{user}:{token}@"), -// (Some(user), None) => format!("{user}@",), -// (None, Some(token)) => format!("{token}@"), -// (None, None) => String::new(), -// }, -// _ => String::new(), -// }; -// -// let host = match &self.host() { -// Some(host) => host.to_string(), -// None => String::new(), -// }; -// -// let port = match &self.port() { -// Some(p) => format!(":{p}",), -// None => String::new(), -// }; -// -// let path = if self.scheme().clone() == Some(Scheme::Ssh) { -// if self.port().is_some() { -// if !self.path().as_str().starts_with('/') { -// format!("/{}", &self.path()) -// } else { -// self.path().to_string() -// } -// } else { -// format!(":{}", &self.path()) -// } -// } else { -// self.path().to_string() -// }; -// -// let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}"); -// -// write!(f, "{git_url_str}",) -// } -//} -// +// TODO: Revisit this +/// Build the printable GitUrl from its components +impl fmt::Display for GitUrl<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let scheme = if let Some(scheme) = &self.scheme() + && self.print_scheme() + { + format!("{scheme}://") + } else { + String::new() + }; + + let auth_info = match self.scheme() { + Some("ssh") | Some("git") | Some("git+ssh") => { + if let Some(user) = &self.user() { + format!("{user}@") + } else { + String::new() + } + } + Some("http") | Some("https") => match (&self.user(), &self.token()) { + (Some(user), Some(token)) => format!("{user}:{token}@"), + (Some(user), None) => format!("{user}@",), + (None, Some(token)) => format!("{token}@"), + (None, None) => String::new(), + }, + _ => String::new(), + }; + + let host = match &self.host() { + Some(host) => host.to_string(), + None => String::new(), + }; + + let port = match &self.port() { + Some(p) => format!(":{p}",), + None => String::new(), + }; + + let path = if let Some(path) = &self.path() { + if self.scheme().clone() == Some("ssh") { + if self.port().is_some() { + if !path.starts_with('/') { + format!("/{path}") + } else { + path.to_string() + } + } else { + format!(":{path}") + } + } else { + path.to_string() + } + } else { + String::new() + }; + + let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}"); + + write!(f, "{git_url_str}",) + } +} + //impl FromStr for GitUrlOld { // //type Err = GitUrlParseError; // type Err = GitUrlOldBuilderError; @@ -474,6 +479,13 @@ pub struct GitUrl<'a> } impl<'a> GitUrl<'a> { + pub fn provider_info(&self) -> Result + where + T: GitProvider, GitUrlParseError>, + { + T::from_git_url(self) + } + /// Returns `GitUrl` after removing `user` and `token` values /// Intended use-case is for non-destructive printing GitUrl excluding any embedded auth info pub fn trim_auth(&self) -> GitUrl { diff --git a/tests/provider.rs b/tests/provider.rs index 2e45c68..8e07a08 100644 --- a/tests/provider.rs +++ b/tests/provider.rs @@ -39,7 +39,7 @@ fn ssh_generic_git() { fn custom_provider() { #[derive(Debug, Clone, PartialEq, Eq)] struct TestProvider; - impl GitProvider for TestProvider { + impl GitProvider, GitUrlParseError> for TestProvider { fn from_git_url(_url: &GitUrl) -> Result { Ok(Self) } @@ -165,13 +165,13 @@ fn ssh_gitlab_subgroups() { assert_eq!(provider_info, expected) } -#[test] -fn filepath() { - let test_url = "file:///home/user/Documents/"; - let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - - assert!(parsed.provider().is_none()); - - let provider_info: Result = parsed.provider_info(); - assert!(provider_info.is_err()) -} +//#[test] +//fn filepath() { +// let test_url = "file:///home/user/Documents/"; +// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); +// +// assert!(parsed.provider().is_none()); +// +// let provider_info: Result = parsed.provider_info(); +// assert!(provider_info.is_err()) +//} diff --git a/tests/trim_auth.rs b/tests/trim_auth.rs index 083dbba..d29be60 100644 --- a/tests/trim_auth.rs +++ b/tests/trim_auth.rs @@ -1,101 +1,101 @@ use git_url_parse::*; -#[test] -fn ssh_user_ports() { - let test_url = "ssh://git@host.tld:9999/user/project-name.git"; - let parsed_and_trimmed = GitUrl::parse(test_url) - .expect("URL parse failed") - .trim_auth(); - let expected = "ssh://host.tld:9999/user/project-name.git"; - - assert_eq!(format!("{}", parsed_and_trimmed), expected); -} - -// Specific service support -#[test] -fn https_user_bitbucket() { - let test_url = "https://user@bitbucket.org/user/repo.git"; - let parsed_and_trimmed = GitUrl::parse(test_url) - .expect("URL parse failed") - .trim_auth(); - let expected = "https://bitbucket.org/user/repo.git"; - - assert_eq!(format!("{}", parsed_and_trimmed), expected); -} - -#[test] -fn ssh_user_bitbucket() { - let test_url = "git@bitbucket.org:user/repo.git"; - let parsed_and_trimmed = GitUrl::parse(test_url) - .expect("URL parse failed") - .trim_auth(); - let expected = "bitbucket.org:user/repo.git"; - - assert_eq!(format!("{}", parsed_and_trimmed), expected); -} - -#[test] -fn https_user_auth_bitbucket() { - let test_url = "https://x-token-auth:token@bitbucket.org/owner/name.git/"; - let parsed_and_trimmed = GitUrl::parse(test_url) - .expect("URL parse failed") - .trim_auth(); - let expected = "https://bitbucket.org/owner/name.git/"; - - assert_eq!(format!("{}", parsed_and_trimmed), expected); -} - -#[test] -fn https_user_github() { - let test_url = "https://user@github.com/user/repo.git/"; - let parsed_and_trimmed = GitUrl::parse(test_url) - .expect("URL parse failed") - .trim_auth(); - let expected = "https://github.com/user/repo.git/"; - - assert_eq!(format!("{}", parsed_and_trimmed), expected); -} - -#[test] -fn ssh_user_github() { - let test_url = "git@github.com:user/repo.git"; - let parsed_and_trimmed = GitUrl::parse(test_url) - .expect("URL parse failed") - .trim_auth(); - let expected = "github.com:user/repo.git"; - - assert_eq!(format!("{}", parsed_and_trimmed), expected); -} - -#[test] -fn https_user_auth_github() { - let test_url = "https://token:x-oauth-basic@github.com/owner/name.git/"; - let parsed_and_trimmed = GitUrl::parse(test_url) - .expect("URL parse failed") - .trim_auth(); - let expected = "https://github.com/owner/name.git/"; - - assert_eq!(format!("{}", parsed_and_trimmed), expected); -} - -#[test] -fn ssh_user_azure_devops() { - let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; - let parsed_and_trimmed = GitUrl::parse(test_url) - .expect("URL parse failed") - .trim_auth(); - let expected = "ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; - - assert_eq!(format!("{}", parsed_and_trimmed), expected); -} - -#[test] -fn https_user_azure_devops() { - let test_url = "https://organization@dev.azure.com/organization/project/_git/repo"; - let parsed_and_trimmed = GitUrl::parse(test_url) - .expect("URL parse failed") - .trim_auth(); - let expected = "https://dev.azure.com/organization/project/_git/repo"; - - assert_eq!(format!("{}", parsed_and_trimmed), expected); -} +//#[test] +//fn ssh_user_ports() { +// let test_url = "ssh://git@host.tld:9999/user/project-name.git"; +// let parsed_and_trimmed = GitUrl::parse(test_url) +// .expect("URL parse failed") +// .trim_auth(); +// let expected = "ssh://host.tld:9999/user/project-name.git"; +// +// assert_eq!(format!("{}", parsed_and_trimmed), expected); +//} +// +//// Specific service support +//#[test] +//fn https_user_bitbucket() { +// let test_url = "https://user@bitbucket.org/user/repo.git"; +// let parsed_and_trimmed = GitUrl::parse(test_url) +// .expect("URL parse failed") +// .trim_auth(); +// let expected = "https://bitbucket.org/user/repo.git"; +// +// assert_eq!(format!("{}", parsed_and_trimmed), expected); +//} +// +//#[test] +//fn ssh_user_bitbucket() { +// let test_url = "git@bitbucket.org:user/repo.git"; +// let parsed_and_trimmed = GitUrl::parse(test_url) +// .expect("URL parse failed") +// .trim_auth(); +// let expected = "bitbucket.org:user/repo.git"; +// +// assert_eq!(format!("{}", parsed_and_trimmed), expected); +//} +// +//#[test] +//fn https_user_auth_bitbucket() { +// let test_url = "https://x-token-auth:token@bitbucket.org/owner/name.git/"; +// let parsed_and_trimmed = GitUrl::parse(test_url) +// .expect("URL parse failed") +// .trim_auth(); +// let expected = "https://bitbucket.org/owner/name.git/"; +// +// assert_eq!(format!("{}", parsed_and_trimmed), expected); +//} +// +//#[test] +//fn https_user_github() { +// let test_url = "https://user@github.com/user/repo.git/"; +// let parsed_and_trimmed = GitUrl::parse(test_url) +// .expect("URL parse failed") +// .trim_auth(); +// let expected = "https://github.com/user/repo.git/"; +// +// assert_eq!(format!("{}", parsed_and_trimmed), expected); +//} +// +//#[test] +//fn ssh_user_github() { +// let test_url = "git@github.com:user/repo.git"; +// let parsed_and_trimmed = GitUrl::parse(test_url) +// .expect("URL parse failed") +// .trim_auth(); +// let expected = "github.com:user/repo.git"; +// +// assert_eq!(format!("{}", parsed_and_trimmed), expected); +//} +// +//#[test] +//fn https_user_auth_github() { +// let test_url = "https://token:x-oauth-basic@github.com/owner/name.git/"; +// let parsed_and_trimmed = GitUrl::parse(test_url) +// .expect("URL parse failed") +// .trim_auth(); +// let expected = "https://github.com/owner/name.git/"; +// +// assert_eq!(format!("{}", parsed_and_trimmed), expected); +//} +// +//#[test] +//fn ssh_user_azure_devops() { +// let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; +// let parsed_and_trimmed = GitUrl::parse(test_url) +// .expect("URL parse failed") +// .trim_auth(); +// let expected = "ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; +// +// assert_eq!(format!("{}", parsed_and_trimmed), expected); +//} +// +//#[test] +//fn https_user_azure_devops() { +// let test_url = "https://organization@dev.azure.com/organization/project/_git/repo"; +// let parsed_and_trimmed = GitUrl::parse(test_url) +// .expect("URL parse failed") +// .trim_auth(); +// let expected = "https://dev.azure.com/organization/project/_git/repo"; +// +// assert_eq!(format!("{}", parsed_and_trimmed), expected); +//} From a95e34fdfd8ce4748334c9e7ea35857af4c2c321 Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Fri, 5 Sep 2025 19:58:03 -0700 Subject: [PATCH 20/32] Parse tests passing --- Cargo.toml | 2 +- examples/multi.rs | 7 +- examples/nom.rs | 353 -------------------- examples/trim_auth.rs | 4 +- src/types/mod.rs | 737 +++++++++++++++--------------------------- tests/parse.rs | 65 ++-- 6 files changed, 290 insertions(+), 878 deletions(-) delete mode 100644 examples/nom.rs diff --git a/Cargo.toml b/Cargo.toml index c5c944d..89f7faa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ tracing = ["dep:tracing"] [dependencies] tracing = { version = "0.1", optional = true } #url = { version = "2.2" } -strum = { version = "0.27", features = ["derive"] } +#strum = { version = "0.27", features = ["derive"] } thiserror = "2" nom = "8" diff --git a/examples/multi.rs b/examples/multi.rs index 7fde7bf..ce59157 100644 --- a/examples/multi.rs +++ b/examples/multi.rs @@ -4,8 +4,10 @@ fn main() -> Result<(), GitUrlParseError> { env_logger::init(); let test_vec = vec![ - "https://github.com/tjtelan/orbitalci.git", - "git@github.com:tjtelan/orbitalci.git", + "https://github.com/tjtelan/git-url-parse-rs.git", + "git@github.com:tjtelan/git-url-parse-rs.git", + "git@hostname:22/path/to/repo.git", + "ssh://git@github.com:22/asdf/asdf.git", "https://token:x-oauth-basic@host.xz/path/to/repo.git/", "https://x-token-auth:token@host.xz/path/to/repo.git/", "git+ssh://git@some-host.com/and-the-path/name", @@ -15,6 +17,7 @@ fn main() -> Result<(), GitUrlParseError> { "~/path/to/repo.git/", "./path/to/repo.git/", "./path/to/repo.git", + "/path/to/repo.git", "../test_repo", "..\\test_repo", "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName", diff --git a/examples/nom.rs b/examples/nom.rs deleted file mode 100644 index 51e312e..0000000 --- a/examples/nom.rs +++ /dev/null @@ -1,353 +0,0 @@ -use getset::{Getters, Setters}; -use git_url_parse::{GitUrl, GitUrlParseError}; -use nom::FindSubstring; -use nom::bytes::complete::{is_a, take_while}; -use nom::character::complete::{digit1, one_of}; -use nom::combinator::{opt, peek, verify}; -use nom::error::context; -use nom::multi::{many0, many1}; -use nom::sequence::{preceded, terminated}; -use nom::{ - IResult, Parser, - branch::alt, - bytes::complete::tag, - character::complete::alpha1, - combinator::recognize, - sequence::{pair, separated_pair}, -}; - -#[derive(Debug, Getters, Setters, Default)] -struct GitUrl2<'a> { - url: String, - scheme: Option<&'a str>, - user: Option<&'a str>, - token: Option<&'a str>, - host: Option<&'a str>, - port: Option<&'a str>, - path: Option<&'a str>, -} - -impl<'a> GitUrl2<'a> { - // https://datatracker.ietf.org/doc/html/rfc3986 - // Based on rfc3986, but does not strictly cover the spec - // * No support for: - // * query, fragment, percent-encoding, and much of the edges for path support - // * many forms of ip representations like ipv6, hexdigits - // * Added support for: - // * parsing ssh git urls which use ":" as a delimiter between the authority and path - // * parsing userinfo into user:token (but its officially deprecated, per #section-3.2.1) - // * some limited support for windows/linux filepaths - pub fn parse(input: &'a str) -> IResult<&'a str, Self> { - let original = input; - - let (input, scheme) = Self::parse_scheme.parse(input)?; - let (input, heir_part) = Self::parse_hier_part(input)?; - - let (user_opt, token_opt) = heir_part.0.0; - let (host_opt) = heir_part.0.1; - let (port_opt) = heir_part.0.2; - let (path_opt) = heir_part.1; - - Ok(( - input, - GitUrl2 { - url: original.to_string(), - scheme, - user: user_opt, - token: token_opt, - host: host_opt, - port: port_opt, - path: path_opt, - }, - )) - } - - pub fn parse_scheme(input: &'a str) -> IResult<&'a str, Option<&'a str>> { - let mut check = context( - "scheme validate", - peek(pair( - pair( - alpha1, - take_while(|c: char| { - c.is_ascii_alphabetic() - || c.is_ascii_digit() - || c == '+' - || c == '-' - || c == '.' - }), - ), - tag::<&str, &str, nom::error::Error<&str>>("://"), - )), - ); - - if check.parse(input).is_err() { - return Ok((input, None)); - } - - // Must start with alpha character, then alpha/digit/+/-/. - context( - "Scheme parse", - opt(terminated( - recognize(pair( - alpha1, - take_while(|c: char| { - c.is_ascii_alphabetic() - || c.is_ascii_digit() - || c == '+' - || c == '-' - || c == '.' - }), - )), - // Not part of spec. We consume the "://" here to more easily manage scheme to be optional - tag("://"), - )), - ) - .parse(input) - } - - // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2 - // The rfc says parsing the "//" part of the uri belongs to the hier-part parsing - // but we only support common internet protocols, file paths, but not other "baseless" ones - // so it is sensible for this move it with scheme parsing to support git user service urls - pub fn parse_hier_part( - input: &'a str, - ) -> IResult< - &'a str, - ( - ((Option<&str>, Option<&str>), Option<&str>, Option<&str>), - Option<&'a str>, - ), - > { - let (input, authority) = Self::parse_authority(input)?; - //println!("authority: {authority:?}"); - - let (input, part) = context( - "Top of path parsers", - alt(( - //preceded(tag("//"), Self::path_abempty_parser()), - Self::path_abempty_parser(), - Self::path_rootless_parser(), - Self::path_ssh_parser(), - )), - ) - .parse(input)?; - - Ok((input, (authority, Some(part)))) - } - - pub fn parse_authority( - input: &'a str, - ) -> IResult<&'a str, ((Option<&str>, Option<&str>), Option<&str>, Option<&str>)> { - let original = input; - - // Optional: username / token - let (input, userinfo) = Self::parse_userinfo(input)?; - - // Host - let (input, host) = context( - "Host parser", - opt(verify( - recognize(take_while(|c: char| reg_name_uri_chars(c))), - |s: &str| { - let has_alphanum = s.chars().into_iter().find(|c| is_alphanum(*c)).is_some(); - let starts_with_alphanum = s.chars().next().is_some_and(|c| is_alphanum(c)); - - has_alphanum && starts_with_alphanum - }, - )), - ) - .parse(input)?; - - // Optional: port - let (input, port) = Self::parse_port(input)?; - - Ok((input, (userinfo, host, port))) - } - - pub fn parse_userinfo( - authority_input: &'a str, - ) -> IResult<&'a str, (Option<&'a str>, Option<&'a str>)> { - // Peek for username@ - let mut check = context( - "Userinfo validation", - peek(pair( - take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':'), - tag::<&str, &str, nom::error::Error<&str>>("@"), - )), - ); - - if check.parse(authority_input).is_err() { - return Ok((authority_input, (None, None))); - } - - // Userinfo - let (authority_input, userinfo) = context( - "Userinfo parser", - opt(recognize(take_while(|c: char| { - unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' - }))), - ) - .parse(authority_input)?; - - let (authority_input, _) = if userinfo.is_some() { - context("Userinfo '@' parser", tag("@")).parse(authority_input)? - } else { - // No change to input, but let the compiler be happy - (authority_input, authority_input) - }; - - // Break down userinfo into user and token - let (user, token) = if let Some(userinfo) = userinfo { - if userinfo.contains(":") { - let (_, (user, token)) = context( - "Userinfo with colon parser", - separated_pair( - take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), - tag(":"), - take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), - ), - ) - .parse(userinfo)?; - (Some(user), Some(token)) - } else { - (Some(userinfo), None) - } - } else { - (None, None) - }; - - Ok((authority_input, (user, token))) - } - - pub fn parse_port(authority_input: &'a str) -> IResult<&'a str, Option<&'a str>> { - context("Port parser", opt(preceded(tag(":"), digit1))).parse(authority_input) - } - - // This will get absolute paths. - // todo: test for empty and start with "//" - pub fn path_abempty_parser( - ) -> impl Parser< - &'a str, - Output = > as Parser< - &'a str, - >>::Output, - Error = nom::error::Error<&'a str>, - >{ - // Starts with '/' or empty - context( - "Path parser (abempty)", - recognize(many1(pair( - tag("/"), - take_while(|c: char| pchar_uri_chars(c)), - ))), - ) - } - - pub fn path_ssh_parser( - ) -> impl Parser< - &'a str, - Output = > as Parser< - &'a str, - >>::Output, - Error = nom::error::Error<&'a str>, - >{ - context( - "Path parser (ssh)", - recognize(( - tag(":"), - take_while(|c: char| pchar_uri_chars(c)), - many1(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), - )), - ) - } - - pub fn path_rootless_parser( - ) -> impl Parser< - &'a str, - Output = > as Parser< - &'a str, - >>::Output, - Error = nom::error::Error<&'a str>, - >{ - context( - "Path parser (rootless)", - recognize(pair( - take_while(|c: char| pchar_uri_chars(c)), - many0(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), - )), - ) - } -} - -fn pchar_uri_chars(c: char) -> bool { - // unreserved / pct-encoded (not implemented) / sub-delims / ":" / "@" - unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' || c == '@' -} - -fn reg_name_uri_chars(c: char) -> bool { - // *( unreserved / pct-encoded (not implemented) / sub-delims ) - unreserved_uri_chars(c) || subdelims_uri_chars(c) -} -fn unreserved_uri_chars(c: char) -> bool { - is_alphanum(c) || c == '-' || c == '.' || c == '_' || c == '~' -} - -fn is_alphanum(c: char) -> bool { - c.is_ascii_alphabetic() || c.is_ascii_digit() -} - -fn subdelims_uri_chars(c: char) -> bool { - c == '!' - || c == '$' - || c == '&' - || c == '\'' - || c == '(' - || c == ')' - || c == '*' - || c == '+' - || c == ',' - || c == ';' - || c == '=' - || c == '\\' // This is not part of spec, but used for windows paths -} - -fn main() -> Result<(), GitUrlParseError> { - env_logger::init(); - - let test_vec = vec![ - "https://github.com/tjtelan/git-url-parse-rs.git", - "git@github.com:tjtelan/git-url-parse-rs.git", - "git@hostname:22/path/to/repo.git", - "ssh://git@github.com:22/asdf/asdf.git", - "https://token:x-oauth-basic@host.xz/path/to/repo.git/", - "https://x-token-auth:token@host.xz/path/to/repo.git/", - "git+ssh://git@some-host.com/and-the-path/name", - "git://some-host.com/and-the-path/name", - "host.tld:user/project-name.git", - "file:///path/to/repo.git/", - "~/path/to/repo.git/", - "./path/to/repo.git/", - "./path/to/repo.git", - "/path/to/repo.git", - "../test_repo", - "..\\test_repo", - "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName", - "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName", - ]; - - for test_url in test_vec { - //let parsed = GitUrl::parse(test_url).unwrap(); - ////println!("leftover:{leftover:#?}, output:{output:#?}"); - ////let parsed = GitUrl::parse(test_url)?; - ////println!("Original: {}", test_url); - //println!("Parsed: {}", parsed); - //println!("Parsed: {:#?}", parsed); - ////println!("{:?}\n", parsed); - - let parsed = GitUrl2::parse(test_url).unwrap(); - println!("{parsed:#?}"); - //println!("{:?}", parsed.parse()); - println!(""); - } - Ok(()) -} diff --git a/examples/trim_auth.rs b/examples/trim_auth.rs index 7ddcde4..a685035 100644 --- a/examples/trim_auth.rs +++ b/examples/trim_auth.rs @@ -1,4 +1,4 @@ -use git_url_parse::{GitUrlOld, GitUrlParseError}; +use git_url_parse::{GitUrl, GitUrlParseError}; fn main() -> Result<(), GitUrlParseError> { env_logger::init(); @@ -25,7 +25,7 @@ fn main() -> Result<(), GitUrlParseError> { println!("Original: {}", test_url); println!( "Parsed + Trimmed: {}\n", - GitUrlOld::parse(test_url).unwrap().trim_auth() + GitUrl::parse(test_url).unwrap().trim_auth() ); } Ok(()) diff --git a/src/types/mod.rs b/src/types/mod.rs index 2b36eae..f1c7efc 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -6,54 +6,23 @@ pub use provider::{AzureDevOpsProvider, GenericProvider, GitLabProvider, GitProv use core::str; use std::fmt; -use std::str::FromStr; -use derive_builder::Builder; -use getset::{CloneGetters, CopyGetters, Getters, Setters}; +use getset::{CloneGetters, CopyGetters, Setters}; use nom::Finish; use nom::branch::alt; -use nom::bytes::complete::{tag, take_till, take_until, take_while}; -use nom::character::complete::one_of; +use nom::bytes::complete::{tag, take_while}; use nom::character::complete::{alpha1, digit1}; -use nom::combinator::{peek, recognize, verify}; +use nom::combinator::{map_opt, peek, recognize, verify}; use nom::error::context; use nom::multi::{many0, many1}; use nom::sequence::{pair, preceded, separated_pair, terminated}; -use nom::{IResult, Parser, combinator::opt, combinator::rest}; +use nom::{IResult, Parser, combinator::opt}; -use strum::{Display, EnumString, VariantNames}; #[cfg(feature = "tracing")] use tracing::debug; use typed_path::{Utf8TypedPath, Utf8TypedPathBuf}; -//// todo: let's get rid of this -///// Supported uri schemes for parsing -//#[derive(Debug, PartialEq, Eq, EnumString, VariantNames, Clone, Display)] -//#[strum(serialize_all = "kebab_case")] -//pub(crate) enum Scheme { -// /// Represents `file://` url scheme -// File, -// /// Represents `ftp://` url scheme -// Ftp, -// /// Represents `ftps://` url scheme -// Ftps, -// /// Represents `git://` url scheme -// Git, -// /// Represents `git+ssh://` url scheme -// #[strum(serialize = "git+ssh")] -// GitSsh, -// /// Represents `http://` url scheme -// Http, -// /// Represents `https://` url scheme -// Https, -// /// Represents `ssh://` url scheme -// Ssh, -// ///// Represents No url scheme -// //Unspecified, -// Other(String), // todo: need test for this -//} - -#[derive(Clone, Debug, Default, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] pub(crate) enum GitUrlParseHint { #[default] Unknown, @@ -62,325 +31,52 @@ pub(crate) enum GitUrlParseHint { Httplike, } -///// GitUrl represents an input url that is a url used by git -///// Internally during parsing the url is sanitized and uses the `url` crate to perform -///// the majority of the parsing effort, and with some extra handling to expose -///// metadata used my many git hosting services -//#[derive(Debug, PartialEq, Eq, Clone, Builder, Getters, Setters)] -//#[builder(build_fn(validate = "Self::prebuild_check"), field(public))] -//#[get = "pub"] -//pub struct GitUrlOld

-//where -// P: GitProvider, -//{ -// /// The host, domain or IP of the repo -// #[builder(setter(into, strip_option), default)] -// host: Option, -// /// The url scheme -// #[builder(setter(into, strip_option), default)] -// scheme: Option, -// /// Authentication user -// #[builder(setter(into, strip_option), default)] -// #[getset(set = "pub(crate)")] -// user: Option, -// /// Authentication token (could appear in the https urls) -// #[builder(setter(into, strip_option), default)] -// #[getset(set = "pub(crate)")] -// token: Option, -// /// The port where git service is hosted -// #[builder(setter(into, strip_option), default)] -// port: Option, -// /// The path to repo w/ respect to user + hostname -// #[builder(setter(into))] -// path: Utf8TypedPathBuf, -// /// Include scheme:// when printing url -// #[builder(default)] -// print_scheme: bool, -// /// Hosted git provider info derived from GitUrl -// #[builder(setter(into, strip_option), default)] -// provider: Option

, -//} - -//impl> GitUrlOldBuilder

{ -// pub fn trim_auth(&mut self) { -// self.user = None; -// self.token = None; -// } -// -// fn prebuild_check(&self) -> Result<(), String> { -// //#[cfg(feature = "tracing")] -// //debug!("Processing: {:?}", &url); -// -// // Error if there are null bytes within the url -// -// // https://github.com/tjtelan/git-url-parse-rs/issues/16 -// if let Some(Some(host)) = &self.host { -// if host.contains('\0') { -// return Err(GitUrlParseError::FoundNullBytes.to_string()); -// } -// -// if host.is_empty() { -// return Err( -// GitUrlParseError::UnexpectedEmptyValue(String::from("host")).to_string() -// ); -// } -// } -// -// if let Some(Some(user)) = &self.user { -// if user.contains('\0') { -// return Err(GitUrlParseError::FoundNullBytes.to_string()); -// } -// -// if user.is_empty() { -// return Err( -// GitUrlParseError::UnexpectedEmptyValue(String::from("user")).to_string() -// ); -// } -// } -// -// if let Some(Some(token)) = &self.token { -// if token.contains('\0') { -// return Err(GitUrlParseError::FoundNullBytes.to_string()); -// } -// -// if token.is_empty() { -// return Err( -// GitUrlParseError::UnexpectedEmptyValue(String::from("token")).to_string(), -// ); -// } -// } -// -// if let Some(path) = &self.path { -// if path.as_str().contains('\0') { -// return Err(GitUrlParseError::FoundNullBytes.to_string()); -// } -// if path.as_str().is_empty() { -// return Err( -// GitUrlParseError::UnexpectedEmptyValue(String::from("path")).to_string() -// ); -// } -// } -// -// Ok(()) -// } -// -// fn parse(url: &str) -> Result { -// debug!("{url}"); -// let mut giturl = GitUrlOldBuilder::default(); -// let mut working_url = url; -// let mut hint = GitUrlParseHint::default(); -// -// giturl.parse_scheme(&mut working_url, &mut hint); -// giturl.parse_auth_info(&mut working_url, &mut hint); -// let save_state = working_url; -// -// giturl.parse_host_port(&mut working_url, &mut hint); -// -// match hint { -// GitUrlParseHint::Httplike => { -// if working_url.starts_with(":") && giturl.port.is_none() { -// return Err(GitUrlParseError::UnexpectedFormat); -// } else { -// println!("Nothing wrong here: {working_url}"); -// } -// } -// GitUrlParseHint::Sshlike => { -// giturl.parse_ssh_path(&mut working_url, &mut hint); -// } -// GitUrlParseHint::Filelike | GitUrlParseHint::Unknown => { -// working_url = save_state; -// giturl.host = None; -// giturl.scheme(Scheme::File); -// } -// } -// -// giturl.parse_path(&mut working_url, &mut hint); -// -// Ok(giturl) -// } -// -// fn parse_scheme(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { -// let mut builder = self.clone(); -// -// if let Ok((leftover, Some(s))) = GitUrlOldBuilder::

::_parse_scheme(working_url) { -// println!("leftover: {leftover}, scheme: {s:?}"); -// -// let scheme = Scheme::from_str(s).expect("Unknown scheme"); -// -// *hint = match &scheme { -// Scheme::Ssh => GitUrlParseHint::Sshlike, -// Scheme::File => GitUrlParseHint::Filelike, -// _ => GitUrlParseHint::Httplike, -// }; -// -// builder.scheme(scheme); -// builder.print_scheme(true); -// -// *self = builder; -// *working_url = leftover; -// } -// } -// -// fn parse_auth_info(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { -// let mut builder = self.clone(); -// if let Ok((leftover, Some(username))) = GitUrlOldBuilder::

::_parse_username(working_url) -// { -// println!("leftover: {leftover}, username: {username:?}"); -// builder.user(username); -// -// if *hint == GitUrlParseHint::Unknown { -// *hint = GitUrlParseHint::Sshlike; -// } -// -// if let Ok((token, Some(real_username))) = GitUrlOldBuilder::

::_parse_token(username) -// { -// println!("token: {token}, real_username: {real_username:?}"); -// builder.user(real_username); -// builder.token(token); -// -// if *hint == GitUrlParseHint::Unknown || *hint == GitUrlParseHint::Sshlike { -// *hint = GitUrlParseHint::Httplike; -// } -// } -// -// *working_url = leftover; -// *self = builder; -// } -// } -// -// fn parse_host_port(&mut self, working_url: &mut &str, hint: &mut GitUrlParseHint) { -// let mut builder = self.clone(); -// let mut save = working_url.clone(); -// -// if let Ok((leftover, Some(hostname))) = GitUrlOldBuilder::

::_parse_hostname(save) { -// println!("leftover {leftover}, hostname: {hostname}"); -// builder.host(hostname); -// save = leftover; -// } -// -// if let Ok((leftover, Some(port))) = GitUrlOldBuilder::

::_parse_port(save) { -// if !port.is_empty() { -// println!("leftover {leftover}, port: {port}"); -// builder.port(u16::from_str(port).expect("Not a valid port")); -// save = leftover; -// -// // If we're currently uncertain, but we've found a port -// // our guess is this more likely is an http url than an ssh url -// // Add the `ssh://` scheme to the url if this is incorrect -// if *hint == GitUrlParseHint::Unknown { -// *hint = GitUrlParseHint::Httplike; -// } -// } -// } -// -// // https://mslinn.com/git/040-git-urls.html - we only support relative paths when we have ports -// if builder.port.is_none() && save.starts_with(":") { -// *hint = GitUrlParseHint::Sshlike; -// } -// -// *self = builder; -// *working_url = save; -// } -// -// fn parse_ssh_path(&mut self, working_url: &mut &str, _hint: &mut GitUrlParseHint) { -// let mut builder = self.clone(); -// -// if let Ok((_leftover, Some(path))) = GitUrlOldBuilder::

::_parse_ssh_path(working_url) { -// builder.scheme(Scheme::Ssh); -// -// *self = builder; -// *working_url = path; -// } -// } -// -// fn parse_path(&mut self, working_url: &mut &str, _hint: &mut GitUrlParseHint) { -// let mut builder = self.clone(); -// if let Ok((leftover, path)) = GitUrlOldBuilder::

::_parse_path(working_url) { -// println!("leftover {leftover}, path: {path}"); -// -// let parsed_path = Utf8TypedPath::derive(path).to_path_buf(); -// builder.path(parsed_path); -// -// *self = builder; -// *working_url = leftover; -// } -// } -// -// //// -// -// fn _parse_scheme(input: &str) -> IResult<&str, Option<&str>> { -// opt(terminated( -// alt(( -// // Fancy: Can I build an iter map on this? -// tag(Scheme::File.to_string().as_bytes()), -// tag(Scheme::Ftps.to_string().as_bytes()), -// tag(Scheme::Ftp.to_string().as_bytes()), -// tag(Scheme::GitSsh.to_string().as_bytes()), -// tag(Scheme::Git.to_string().as_bytes()), -// tag(Scheme::Https.to_string().as_bytes()), -// tag(Scheme::Http.to_string().as_bytes()), -// tag(Scheme::Ssh.to_string().as_bytes()), -// // todo: Other(), needs a test -// )), -// tag("://"), -// )) -// .parse(input) -// } -// -// fn _parse_username(input: &str) -> IResult<&str, Option<&str>> { -// opt(terminated(take_until("@"), tag("@"))).parse(input) -// } -// -// fn _parse_token(input: &str) -> IResult<&str, Option<&str>> { -// opt(terminated(take_until(":"), tag(":"))).parse(input) -// } -// -// fn _parse_hostname(input: &str) -> IResult<&str, Option<&str>> { -// opt(take_till(|c| c == '/' || c == ':')).parse(input) -// } -// -// fn _parse_port(input: &str) -> IResult<&str, Option<&str>> { -// opt(preceded(tag(":"), take_while(|c: char| c.is_ascii_digit()))).parse(input) -// } -// -// // This is making an assumption that the path is relative, not absolute -// // This is bc we do not support absolute paths when we also have a port -// fn _parse_ssh_path(input: &str) -> IResult<&str, Option<&str>> { -// opt(preceded(one_of("/:"), rest)).parse(input) -// } -// -// fn _parse_path(input: &str) -> IResult<&str, &str> { -// rest(input) -// } -//} - -// TODO: Revisit this +#[derive(Clone, CopyGetters, CloneGetters, Setters, Default, PartialEq, Eq)] +pub struct GitUrl<'url> { + #[getset(get_copy = "pub", set = "pub(crate)")] + scheme: Option<&'url str>, + #[getset(get_copy = "pub", set = "pub(crate)")] + user: Option<&'url str>, + #[getset(get_copy = "pub", set = "pub(crate)")] + token: Option<&'url str>, + #[getset(get_copy = "pub")] + host: Option<&'url str>, + #[getset(get_copy = "pub")] + port: Option, + #[getset(get_copy = "pub")] + path: Option<&'url str>, + + //#[getset(skip)] + //url: String, + /// Include scheme:// when printing url + #[getset(get_copy = "pub")] + print_scheme: bool, + + #[getset(get_copy = "pub(crate)")] + hint: GitUrlParseHint, + ///// Hosted git provider info derived from GitUrl + //#[getset(skip)] + //provider: Option

, +} + /// Build the printable GitUrl from its components impl fmt::Display for GitUrl<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let scheme = if let Some(scheme) = &self.scheme() - && self.print_scheme() - { - format!("{scheme}://") + let scheme = if self.print_scheme() { + if let Some(scheme) = self.scheme() { + format!("{scheme}://") + } else { + String::new() + } } else { String::new() }; - let auth_info = match self.scheme() { - Some("ssh") | Some("git") | Some("git+ssh") => { - if let Some(user) = &self.user() { - format!("{user}@") - } else { - String::new() - } - } - Some("http") | Some("https") => match (&self.user(), &self.token()) { - (Some(user), Some(token)) => format!("{user}:{token}@"), - (Some(user), None) => format!("{user}@",), - (None, Some(token)) => format!("{token}@"), - (None, None) => String::new(), - }, - _ => String::new(), + let auth_info = match (self.user(), self.token()) { + (Some(user), Some(token)) => format!("{user}:{token}@"), + (Some(user), None) => format!("{user}@",), + (None, Some(token)) => format!("{token}@"), + (None, None) => String::new(), }; let host = match &self.host() { @@ -388,27 +84,17 @@ impl fmt::Display for GitUrl<'_> { None => String::new(), }; - let port = match &self.port() { - Some(p) => format!(":{p}",), - None => String::new(), - }; - - let path = if let Some(path) = &self.path() { - if self.scheme().clone() == Some("ssh") { - if self.port().is_some() { - if !path.starts_with('/') { - format!("/{path}") - } else { - path.to_string() - } - } else { - format!(":{path}") - } - } else { - path.to_string() + let (port, path) = match (self.hint(), self.port(), self.path()) { + (GitUrlParseHint::Httplike, Some(port), Some(path)) => { + (format!(":{port}"), format!("/{path}")) } - } else { - String::new() + (GitUrlParseHint::Httplike, None, Some(path)) => (format!(""), format!("{path}")), + (GitUrlParseHint::Sshlike, Some(port), Some(path)) => { + (format!(":{port}"), format!("/{path}")) + } + (GitUrlParseHint::Sshlike, None, Some(path)) => (format!(""), format!(":{path}")), + (GitUrlParseHint::Filelike, None, Some(path)) => (format!(""), format!("{path}")), + _ => (format!(""), format!("")), }; let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}"); @@ -417,75 +103,46 @@ impl fmt::Display for GitUrl<'_> { } } -//impl FromStr for GitUrlOld { -// //type Err = GitUrlParseError; -// type Err = GitUrlOldBuilderError; -// -// fn from_str(s: &str) -> Result { -// GitUrlOld::parse(s) -// } -//} -// -//impl GitUrlOld { -// /// Returns `GitUrl` after removing `user` and `token` values -// /// Intended use-case is for non-destructive printing GitUrl excluding any embedded auth info -// pub fn trim_auth(&self) -> GitUrlOld { -// let mut new_giturl = self.clone(); -// new_giturl.set_user(None); -// new_giturl.set_token(None); -// new_giturl -// } -// -// /// Returns a `Result` after normalizing and parsing `url` for metadata -// pub fn parse(url: &str) -> Result { -// let giturl = GitUrlOldBuilder::parse(url).unwrap(); -// giturl.build() -// } -// -// pub fn provider_info(&self) -> Result -// where -// T: GitProvider, -// { -// T::from_git_url(self) -// } -//} - -#[derive(Clone, Debug, CopyGetters, CloneGetters, Setters, Default, PartialEq, Eq)] -//pub struct GitUrl<'a, P = GenericProvider> -pub struct GitUrl<'a> -//where -// P: GitProvider, GitUrlParseError>, -{ - #[getset(get_clone = "pub", set = "pub(crate)")] - url: String, - #[getset(get_copy = "pub", set = "pub(crate)")] - scheme: Option<&'a str>, - #[getset(get_copy = "pub", set = "pub(crate)")] - user: Option<&'a str>, - #[getset(get_copy = "pub", set = "pub(crate)")] - token: Option<&'a str>, - #[getset(get_copy = "pub")] - host: Option<&'a str>, - #[getset(get_copy = "pub")] - port: Option<&'a str>, - #[getset(get_copy = "pub")] - path: Option<&'a str>, - /// Include scheme:// when printing url - #[getset(get_copy = "pub")] - print_scheme: bool, - ///// Hosted git provider info derived from GitUrl - //#[getset(skip)] - //provider: Option

, -} +// This is to hide `url` from debug output +impl fmt::Debug for GitUrl<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + #[derive(Debug)] + struct GitUrl<'a> { + scheme: Option<&'a str>, + user: Option<&'a str>, + token: Option<&'a str>, + host: Option<&'a str>, + port: Option, + path: Option<&'a str>, + } -impl<'a> GitUrl<'a> { - pub fn provider_info(&self) -> Result - where - T: GitProvider, GitUrlParseError>, - { - T::from_git_url(self) + let Self { + //url: _, + scheme, + user, + token, + host, + port, + path, + print_scheme: _, + hint: _, + } = self; + + fmt::Debug::fmt( + &GitUrl { + scheme: *scheme, + user: *user, + token: *token, + host: *host, + port: *port, + path: *path, + }, + f, + ) } +} +impl<'url> GitUrl<'url> { /// Returns `GitUrl` after removing `user` and `token` values /// Intended use-case is for non-destructive printing GitUrl excluding any embedded auth info pub fn trim_auth(&self) -> GitUrl { @@ -494,6 +151,7 @@ impl<'a> GitUrl<'a> { new_giturl.set_token(None); new_giturl } + // https://datatracker.ietf.org/doc/html/rfc3986 // Based on rfc3986, but does not strictly cover the spec // * No support for: @@ -503,48 +161,148 @@ impl<'a> GitUrl<'a> { // * parsing ssh git urls which use ":" as a delimiter between the authority and path // * parsing userinfo into user:token (but its officially deprecated, per #section-3.2.1) // * some limited support for windows/linux filepaths - pub fn parse(input: &'a str) -> Result { + pub fn parse(input: &'url str) -> Result { // Error if there are null bytes within the url // https://github.com/tjtelan/git-url-parse-rs/issues/16 if input.contains('\0') { return Err(GitUrlParseError::FoundNullBytes); } - let original = input; + //let original = input.to_string(); - let (input, scheme) = Self::parse_scheme.parse(input).finish().unwrap_or_default(); + let (input, mut scheme) = Self::parse_scheme.parse(input).finish().unwrap_or_default(); let (_input, heir_part) = Self::parse_hier_part(input).finish().unwrap_or_default(); let (user_opt, token_opt) = heir_part.0.0; let host_opt = heir_part.0.1; let port_opt = heir_part.0.2; - let path_opt = heir_part.1; + let mut path_opt = heir_part.1; + + // We will respect whether scheme was initially set + let print_scheme = scheme.is_some(); - // This needs another pass - let provider = if let Some(scheme) = scheme { - if scheme == "http" || scheme == "https" || scheme == "ssh" { - Some(GenericProvider::default()) + // Take a moment to identify the type of url we have + // We use the GitUrlParseHint to validate or adjust formatting path, if necessary + let hint = if let Some(scheme) = scheme { + if scheme.contains("ssh") { + GitUrlParseHint::Sshlike } else { - None + match scheme.to_lowercase().as_str() { + "file" => GitUrlParseHint::Filelike, + _ => GitUrlParseHint::Httplike, + } } } else { - None + if user_opt.is_none() + && token_opt.is_none() + && host_opt.is_none() + && port_opt.is_none() + && path_opt.is_some() + { + // if we only have a path => file + GitUrlParseHint::Filelike + } else if user_opt.is_some() && token_opt.is_some() { + // If we have a user and token => http + GitUrlParseHint::Httplike + } else if let Some(path) = path_opt { + // If path starts with a colon => ssh + if path.starts_with(':') { + GitUrlParseHint::Sshlike + } else { + GitUrlParseHint::Unknown + } + } else { + GitUrlParseHint::Unknown + } }; - Ok(GitUrl { - url: original.to_string(), + // If we found an ssh url, we should adjust the path. + // Skip the first character + if hint == GitUrlParseHint::Sshlike { + if let Some(scheme) = scheme.as_mut() { + *scheme = "ssh"; + } else { + scheme = Some("ssh") + } + if let Some(path) = path_opt.as_mut() { + *path = &path[1..]; + } + } + + if hint == GitUrlParseHint::Filelike { + if let Some(scheme) = scheme.as_mut() { + *scheme = "file"; + } else { + scheme = Some("file") + } + } + + let git_url = GitUrl { scheme, user: user_opt, token: token_opt, host: host_opt, port: port_opt, path: path_opt, - print_scheme: scheme.is_some(), - //provider - }) + //url: original, + print_scheme, + hint, + }; + + if git_url.is_valid() { + Ok(git_url) + } else { + Err(GitUrlParseError::UnexpectedFormat) + } + } + + pub fn provider_info(&self) -> Result + where + T: GitProvider, GitUrlParseError>, + { + T::from_git_url(self) + } + + fn is_valid(&self) -> bool { + // Last chance validation + + // There's an edge case we don't cover: ssh urls using ports + absolute paths + // https://mslinn.com/git/040-git-urls.html - describes this pattern, if we decide to parse for it + + // only ssh paths start with ':' + if self.hint() != GitUrlParseHint::Sshlike { + if let Some(path) = &self.path { + if path.starts_with(':') { + return false; + } + } + } + + // if we are not httplike, we shouldn't have tokens + if self.hint() != GitUrlParseHint::Httplike { + if self.token().is_some() { + return false; + } + } + + // if we are filelike, we should only have paths + if self.hint() == GitUrlParseHint::Filelike { + if (self.user().is_some() + || self.token().is_some() + || self.host().is_some() + || self.port().is_some() + || self.path().is_none()) + { + return false; + } + } + + // Anything not None should not be empty + + true } - pub fn parse_scheme(input: &'a str) -> IResult<&'a str, Option<&'a str>> { + fn parse_scheme(input: &'url str) -> IResult<&'url str, Option<&'url str>> { let mut check = context( "scheme validate", peek(pair( @@ -591,13 +349,13 @@ impl<'a> GitUrl<'a> { // The rfc says parsing the "//" part of the uri belongs to the hier-part parsing // but we only support common internet protocols, file paths, but not other "baseless" ones // so it is sensible for this move it with scheme parsing to support git user service urls - pub fn parse_hier_part( - input: &'a str, + fn parse_hier_part( + input: &'url str, ) -> IResult< - &'a str, + &'url str, ( - ((Option<&str>, Option<&str>), Option<&str>, Option<&str>), - Option<&'a str>, + ((Option<&str>, Option<&str>), Option<&str>, Option), + Option<&'url str>, ), > { let (input, authority) = Self::parse_authority(input)?; @@ -617,15 +375,28 @@ impl<'a> GitUrl<'a> { Ok((input, (authority, Some(part)))) } - pub fn parse_authority( - input: &'a str, - ) -> IResult<&'a str, ((Option<&str>, Option<&str>), Option<&str>, Option<&str>)> { - let original = input; - + fn parse_authority( + input: &'url str, + ) -> IResult<&'url str, ((Option<&str>, Option<&str>), Option<&str>, Option)> { // Optional: username / token let (input, userinfo) = Self::parse_userinfo(input)?; // Host + + // peek ahead to check for windows path stuff + let check = context( + "Host check for windows path", + peek(preceded( + take_while(|c| reg_name_uri_chars(c) && c != '\\'), + tag::<&str, &str, nom::error::Error<&str>>(":\\"), + )), + ) + .parse(input); + + if check.is_ok() { + return Ok((input, (userinfo, None, None))); + } + let (input, host) = context( "Host parser", opt(verify( @@ -646,9 +417,9 @@ impl<'a> GitUrl<'a> { Ok((input, (userinfo, host, port))) } - pub fn parse_userinfo( - authority_input: &'a str, - ) -> IResult<&'a str, (Option<&'a str>, Option<&'a str>)> { + fn parse_userinfo( + authority_input: &'url str, + ) -> IResult<&'url str, (Option<&'url str>, Option<&'url str>)> { // Peek for username@ let mut check = context( "Userinfo validation", @@ -701,19 +472,25 @@ impl<'a> GitUrl<'a> { Ok((authority_input, (user, token))) } - pub fn parse_port(authority_input: &'a str) -> IResult<&'a str, Option<&'a str>> { - context("Port parser", opt(preceded(tag(":"), digit1))).parse(authority_input) + fn parse_port(authority_input: &'url str) -> IResult<&'url str, Option> { + context( + "Port parser", + opt(map_opt(preceded(tag(":"), digit1), |s: &str| { + s.parse::().ok() + })), + ) + .parse(authority_input) } // This will get absolute paths. // todo: test for empty and start with "//" - pub fn path_abempty_parser( + fn path_abempty_parser( ) -> impl Parser< - &'a str, - Output = > as Parser< - &'a str, + &'url str, + Output = > as Parser< + &'url str, >>::Output, - Error = nom::error::Error<&'a str>, + Error = nom::error::Error<&'url str>, >{ // Starts with '/' or empty context( @@ -725,13 +502,13 @@ impl<'a> GitUrl<'a> { ) } - pub fn path_ssh_parser( + fn path_ssh_parser( ) -> impl Parser< - &'a str, - Output = > as Parser< - &'a str, + &'url str, + Output = > as Parser< + &'url str, >>::Output, - Error = nom::error::Error<&'a str>, + Error = nom::error::Error<&'url str>, >{ context( "Path parser (ssh)", @@ -743,13 +520,13 @@ impl<'a> GitUrl<'a> { ) } - pub fn path_rootless_parser( + fn path_rootless_parser( ) -> impl Parser< - &'a str, - Output = > as Parser< - &'a str, + &'url str, + Output = > as Parser< + &'url str, >>::Output, - Error = nom::error::Error<&'a str>, + Error = nom::error::Error<&'url str>, >{ context( "Path parser (rootless)", diff --git a/tests/parse.rs b/tests/parse.rs index d4d5a4d..ee58ec8 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -4,15 +4,12 @@ fn ssh_user_ports() { let test_url = "ssh://git@host.tld:9999/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!( - parsed.url(), - "ssh://git@host.tld:9999/user/project-name.git" - ); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("ssh")); assert_eq!(parsed.user(), Some("git")); assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), Some("host.tld")); - assert_eq!(parsed.port(), Some("9999")); + assert_eq!(parsed.port(), Some(9999)); assert_eq!(parsed.path(), Some("user/project-name.git")); assert_eq!(parsed.print_scheme(), true); } @@ -22,7 +19,7 @@ fn ssh_no_scheme_no_user() { let test_url = "host.tld:user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!(parsed.url(), "host.tld:user/project-name.git"); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("ssh")); assert_eq!(parsed.user(), None); assert_eq!(parsed.token(), None); @@ -38,7 +35,7 @@ fn https_user_bitbucket() { let test_url = "https://user@bitbucket.org/user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!(parsed.url(), "https://user@bitbucket.org/user/repo.git"); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("https")); assert_eq!(parsed.user(), Some("user")); assert_eq!(parsed.token(), None); @@ -53,7 +50,7 @@ fn ssh_user_bitbucket() { let test_url = "git@bitbucket.org:user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!(parsed.url(), "git@bitbucket.org:user/repo.git"); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("ssh")); assert_eq!(parsed.user(), Some("git")); assert_eq!(parsed.token(), None); @@ -68,10 +65,7 @@ fn https_user_auth_bitbucket() { let test_url = "https://x-token-auth:token@bitbucket.org/owner/name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!( - parsed.url(), - "https://x-token-auth:token@bitbucket.org/owner/name.git" - ); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("https")); assert_eq!(parsed.user(), Some("x-token-auth")); assert_eq!(parsed.token(), Some("token")); @@ -86,7 +80,7 @@ fn https_user_github() { let test_url = "https://user@github.com/user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!(parsed.url(), "https://user@github.com/user/repo.git"); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("https")); assert_eq!(parsed.user(), Some("user")); assert_eq!(parsed.token(), None); @@ -101,7 +95,7 @@ fn ssh_user_github() { let test_url = "git@github.com:user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!(parsed.url(), "git@github.com:user/repo.git"); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("ssh")); assert_eq!(parsed.user(), Some("git")); assert_eq!(parsed.token(), None); @@ -116,10 +110,7 @@ fn https_user_auth_github() { let test_url = "https://token:x-oauth-basic@github.com/owner/name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!( - parsed.url(), - "https://token:x-oauth-basic@github.com/owner/name.git" - ); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("https")); assert_eq!(parsed.user(), Some("token")); assert_eq!(parsed.token(), Some("x-oauth-basic")); @@ -134,10 +125,7 @@ fn ssh_user_azure_devops() { let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!( - parsed.url(), - "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName" - ); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("ssh")); assert_eq!(parsed.user(), Some("git")); assert_eq!(parsed.token(), None); @@ -152,10 +140,7 @@ fn https_user_azure_devops() { let test_url = "https://organization@dev.azure.com/organization/project/_git/repo"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!( - parsed.url(), - "https://organization@dev.azure.com/organization/project/_git/repo" - ); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("https")); assert_eq!(parsed.user(), Some("organization")); assert_eq!(parsed.token(), None); @@ -170,7 +155,7 @@ fn ftp_user() { let test_url = "ftp://git@host.tld/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!(parsed.url(), "ftp://git@host.tld/user/project-name.git"); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("ftp")); assert_eq!(parsed.user(), Some("git")); assert_eq!(parsed.token(), None); @@ -185,7 +170,7 @@ fn ftps_user() { let test_url = "ftps://git@host.tld/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!(parsed.url(), "ftps://git@host.tld/user/project-name.git"); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("ftps")); assert_eq!(parsed.user(), Some("git")); assert_eq!(parsed.token(), None); @@ -200,7 +185,7 @@ fn relative_unix_path() { let test_url = "../project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!(parsed.url(), "../project-name.git"); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("file")); assert_eq!(parsed.user(), None); assert_eq!(parsed.token(), None); @@ -215,7 +200,7 @@ fn absolute_unix_path() { let test_url = "/path/to/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!(parsed.url(), "/path/to/project-name.git"); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("file")); assert_eq!(parsed.user(), None); assert_eq!(parsed.token(), None); @@ -231,7 +216,7 @@ fn relative_windows_path() { let test_url = r"..\project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!(parsed.url(), r"..\project-name.git"); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("file")); assert_eq!(parsed.user(), None); assert_eq!(parsed.token(), None); @@ -243,20 +228,20 @@ fn relative_windows_path() { // Can I use `typed-path` to deal with this? // Issue #7 - Absolute Windows paths will not parse at all -#[should_panic(expected = "URL parse failed: UnexpectedFormat")] +//#[should_panic(expected = "URL parse failed: UnexpectedFormat")] #[test] fn absolute_windows_path() { let test_url = r"c:\project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - - assert_eq!(parsed.url(), "ftps://git@host.tld/user/project-name.git"); - assert_eq!(parsed.scheme(), Some("ftp")); - assert_eq!(parsed.user(), Some("git")); + println!("{parsed:#?}"); + assert_eq!(parsed.to_string(), test_url); + assert_eq!(parsed.scheme(), Some("file")); + assert_eq!(parsed.user(), None); assert_eq!(parsed.token(), None); - assert_eq!(parsed.host(), Some("host.tld")); + assert_eq!(parsed.host(), None); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("/user/project-name.git")); - assert_eq!(parsed.print_scheme(), true); + assert_eq!(parsed.path(), Some(r"c:\project-name.git")); + assert_eq!(parsed.print_scheme(), false); } //// Move test @@ -322,7 +307,7 @@ fn git() { let test_url = "git://github.com/owner/name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - assert_eq!(parsed.url(), "git://github.com/owner/name.git"); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("git")); assert_eq!(parsed.user(), None); assert_eq!(parsed.token(), None); From f20bfeddc3b0c3a269224a2cea3a031d673247da Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Fri, 5 Sep 2025 22:19:18 -0700 Subject: [PATCH 21/32] Added verify steps to parsing --- src/types/mod.rs | 126 +++++++++++++++++++++----------------- src/types/provider/mod.rs | 6 +- tests/parse.rs | 37 ++++++----- 3 files changed, 92 insertions(+), 77 deletions(-) diff --git a/src/types/mod.rs b/src/types/mod.rs index f1c7efc..12b2c15 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -44,7 +44,7 @@ pub struct GitUrl<'url> { #[getset(get_copy = "pub")] port: Option, #[getset(get_copy = "pub")] - path: Option<&'url str>, + path: &'url str, //#[getset(skip)] //url: String, @@ -85,15 +85,15 @@ impl fmt::Display for GitUrl<'_> { }; let (port, path) = match (self.hint(), self.port(), self.path()) { - (GitUrlParseHint::Httplike, Some(port), Some(path)) => { + (GitUrlParseHint::Httplike, Some(port), path) => { (format!(":{port}"), format!("/{path}")) } - (GitUrlParseHint::Httplike, None, Some(path)) => (format!(""), format!("{path}")), - (GitUrlParseHint::Sshlike, Some(port), Some(path)) => { + (GitUrlParseHint::Httplike, None, path) => (format!(""), format!("{path}")), + (GitUrlParseHint::Sshlike, Some(port), path) => { (format!(":{port}"), format!("/{path}")) } - (GitUrlParseHint::Sshlike, None, Some(path)) => (format!(""), format!(":{path}")), - (GitUrlParseHint::Filelike, None, Some(path)) => (format!(""), format!("{path}")), + (GitUrlParseHint::Sshlike, None, path) => (format!(""), format!(":{path}")), + (GitUrlParseHint::Filelike, None, path) => (format!(""), format!("{path}")), _ => (format!(""), format!("")), }; @@ -113,7 +113,7 @@ impl fmt::Debug for GitUrl<'_> { token: Option<&'a str>, host: Option<&'a str>, port: Option, - path: Option<&'a str>, + path: &'a str, } let Self { @@ -176,7 +176,7 @@ impl<'url> GitUrl<'url> { let (user_opt, token_opt) = heir_part.0.0; let host_opt = heir_part.0.1; let port_opt = heir_part.0.2; - let mut path_opt = heir_part.1; + let mut path = heir_part.1; // We will respect whether scheme was initially set let print_scheme = scheme.is_some(); @@ -197,20 +197,20 @@ impl<'url> GitUrl<'url> { && token_opt.is_none() && host_opt.is_none() && port_opt.is_none() - && path_opt.is_some() + && !path.is_empty() { // if we only have a path => file GitUrlParseHint::Filelike } else if user_opt.is_some() && token_opt.is_some() { // If we have a user and token => http GitUrlParseHint::Httplike - } else if let Some(path) = path_opt { + } else if path.starts_with(':') { // If path starts with a colon => ssh - if path.starts_with(':') { - GitUrlParseHint::Sshlike - } else { - GitUrlParseHint::Unknown - } + //if path.starts_with(':') { + GitUrlParseHint::Sshlike + //} else { + // GitUrlParseHint::Unknown + //} } else { GitUrlParseHint::Unknown } @@ -224,9 +224,7 @@ impl<'url> GitUrl<'url> { } else { scheme = Some("ssh") } - if let Some(path) = path_opt.as_mut() { - *path = &path[1..]; - } + path = &path[1..]; } if hint == GitUrlParseHint::Filelike { @@ -243,7 +241,7 @@ impl<'url> GitUrl<'url> { token: token_opt, host: host_opt, port: port_opt, - path: path_opt, + path, //url: original, print_scheme, hint, @@ -266,15 +264,15 @@ impl<'url> GitUrl<'url> { fn is_valid(&self) -> bool { // Last chance validation + //println!("{self:#?}"); + // There's an edge case we don't cover: ssh urls using ports + absolute paths // https://mslinn.com/git/040-git-urls.html - describes this pattern, if we decide to parse for it // only ssh paths start with ':' if self.hint() != GitUrlParseHint::Sshlike { - if let Some(path) = &self.path { - if path.starts_with(':') { - return false; - } + if self.path.starts_with(':') { + return false; } } @@ -287,18 +285,16 @@ impl<'url> GitUrl<'url> { // if we are filelike, we should only have paths if self.hint() == GitUrlParseHint::Filelike { - if (self.user().is_some() + if self.user().is_some() || self.token().is_some() || self.host().is_some() || self.port().is_some() - || self.path().is_none()) + || self.path().is_empty() { return false; } } - // Anything not None should not be empty - true } @@ -327,19 +323,22 @@ impl<'url> GitUrl<'url> { // Must start with alpha character, then alpha/digit/+/-/. context( "Scheme parse", - opt(terminated( - recognize(pair( - alpha1, - take_while(|c: char| { - c.is_ascii_alphabetic() - || c.is_ascii_digit() - || c == '+' - || c == '-' - || c == '.' - }), - )), - // Not part of spec. We consume the "://" here to more easily manage scheme to be optional - tag("://"), + opt(verify( + terminated( + recognize(pair( + alpha1, + take_while(|c: char| { + c.is_ascii_alphabetic() + || c.is_ascii_digit() + || c == '+' + || c == '-' + || c == '.' + }), + )), + // Not part of spec. We consume the "://" here to more easily manage scheme to be optional + tag("://"), + ), + |s: &str| !s.is_empty(), )), ) .parse(input) @@ -355,7 +354,7 @@ impl<'url> GitUrl<'url> { &'url str, ( ((Option<&str>, Option<&str>), Option<&str>, Option), - Option<&'url str>, + &'url str, ), > { let (input, authority) = Self::parse_authority(input)?; @@ -363,16 +362,19 @@ impl<'url> GitUrl<'url> { let (input, part) = context( "Top of path parsers", - alt(( - //preceded(tag("//"), Self::path_abempty_parser()), - Self::path_abempty_parser(), - Self::path_rootless_parser(), - Self::path_ssh_parser(), - )), + verify( + alt(( + //preceded(tag("//"), Self::path_abempty_parser()), + Self::path_abempty_parser(), + Self::path_rootless_parser(), + Self::path_ssh_parser(), + )), + |s: &str| !s.is_empty(), + ), ) .parse(input)?; - Ok((input, (authority, Some(part)))) + Ok((input, (authority, part))) } fn parse_authority( @@ -405,7 +407,7 @@ impl<'url> GitUrl<'url> { let has_alphanum = s.chars().into_iter().find(|c| is_alphanum(*c)).is_some(); let starts_with_alphanum = s.chars().next().is_some_and(|c| is_alphanum(c)); - has_alphanum && starts_with_alphanum + has_alphanum && starts_with_alphanum && !s.is_empty() }, )), ) @@ -436,9 +438,12 @@ impl<'url> GitUrl<'url> { // Userinfo let (authority_input, userinfo) = context( "Userinfo parser", - opt(recognize(take_while(|c: char| { - unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' - }))), + opt(verify( + recognize(take_while(|c: char| { + unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' + })), + |s: &str| !s.is_empty(), + )), ) .parse(authority_input)?; @@ -455,9 +460,15 @@ impl<'url> GitUrl<'url> { let (_, (user, token)) = context( "Userinfo with colon parser", separated_pair( - take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + verify( + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + |s: &str| !s.is_empty(), + ), tag(":"), - take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + verify( + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + |s: &str| !s.is_empty(), + ), ), ) .parse(userinfo)?; @@ -475,9 +486,10 @@ impl<'url> GitUrl<'url> { fn parse_port(authority_input: &'url str) -> IResult<&'url str, Option> { context( "Port parser", - opt(map_opt(preceded(tag(":"), digit1), |s: &str| { - s.parse::().ok() - })), + opt(map_opt( + verify(preceded(tag(":"), digit1), |p_str: &str| !p_str.is_empty()), + |s: &str| s.parse::().ok(), + )), ) .parse(authority_input) } diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index 24afd07..6854248 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -32,7 +32,7 @@ impl GenericProvider { impl GitProvider, GitUrlParseError> for GenericProvider { fn from_git_url(url: &GitUrl) -> Result { - if let (Some(path), Some(host)) = (url.path(), url.host()) { + if let (path, Some(host)) = (url.path(), url.host()) { if let Ok((_, Some((user, repo)))) = GenericProvider::_get_owner_repo(path) { Ok(GenericProvider { host: host.to_string(), @@ -65,7 +65,7 @@ impl AzureDevOpsProvider { impl GitProvider, GitUrlParseError> for AzureDevOpsProvider { fn from_git_url(url: &GitUrl) -> Result { - if let (Some(path), Some(host)) = (url.path(), url.host()) { + if let (path, Some(host)) = (url.path(), url.host()) { if let Ok((_, Some((user, repo)))) = AzureDevOpsProvider::_get_user_repo(path) { Ok(AzureDevOpsProvider { host: host.to_string(), @@ -99,7 +99,7 @@ impl GitLabProvider { impl GitProvider, GitUrlParseError> for GitLabProvider { fn from_git_url(url: &GitUrl) -> Result { - if let (Some(path), Some(host)) = (url.path(), url.host()) { + if let (path, Some(host)) = (url.path(), url.host()) { if let Ok((_, Some((user, repo)))) = GitLabProvider::_get_user_repo(path) { Ok(GitLabProvider { host: host.to_string(), diff --git a/tests/parse.rs b/tests/parse.rs index ee58ec8..c6d1543 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -10,7 +10,7 @@ fn ssh_user_ports() { assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), Some("host.tld")); assert_eq!(parsed.port(), Some(9999)); - assert_eq!(parsed.path(), Some("user/project-name.git")); + assert_eq!(parsed.path(), "user/project-name.git"); assert_eq!(parsed.print_scheme(), true); } @@ -25,7 +25,7 @@ fn ssh_no_scheme_no_user() { assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), Some("host.tld")); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("user/project-name.git")); + assert_eq!(parsed.path(), "user/project-name.git"); assert_eq!(parsed.print_scheme(), false); } @@ -41,7 +41,7 @@ fn https_user_bitbucket() { assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), Some("bitbucket.org")); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("/user/repo.git")); + assert_eq!(parsed.path(), "/user/repo.git"); assert_eq!(parsed.print_scheme(), true); } @@ -56,7 +56,7 @@ fn ssh_user_bitbucket() { assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), Some("bitbucket.org")); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("user/repo.git")); + assert_eq!(parsed.path(), "user/repo.git"); assert_eq!(parsed.print_scheme(), false); } @@ -71,7 +71,7 @@ fn https_user_auth_bitbucket() { assert_eq!(parsed.token(), Some("token")); assert_eq!(parsed.host(), Some("bitbucket.org")); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("/owner/name.git")); + assert_eq!(parsed.path(), "/owner/name.git"); assert_eq!(parsed.print_scheme(), true); } @@ -86,7 +86,7 @@ fn https_user_github() { assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), Some("github.com")); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("/user/repo.git")); + assert_eq!(parsed.path(), "/user/repo.git"); assert_eq!(parsed.print_scheme(), true); } @@ -101,7 +101,7 @@ fn ssh_user_github() { assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), Some("github.com")); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("user/repo.git")); + assert_eq!(parsed.path(), "user/repo.git"); assert_eq!(parsed.print_scheme(), false); } @@ -116,7 +116,7 @@ fn https_user_auth_github() { assert_eq!(parsed.token(), Some("x-oauth-basic")); assert_eq!(parsed.host(), Some("github.com")); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("/owner/name.git")); + assert_eq!(parsed.path(), "/owner/name.git"); assert_eq!(parsed.print_scheme(), true); } @@ -131,7 +131,7 @@ fn ssh_user_azure_devops() { assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), Some("ssh.dev.azure.com")); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("v3/CompanyName/ProjectName/RepoName")); + assert_eq!(parsed.path(), "v3/CompanyName/ProjectName/RepoName"); assert_eq!(parsed.print_scheme(), false); } @@ -146,7 +146,7 @@ fn https_user_azure_devops() { assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), Some("dev.azure.com")); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("/organization/project/_git/repo")); + assert_eq!(parsed.path(), "/organization/project/_git/repo"); assert_eq!(parsed.print_scheme(), true); } @@ -161,7 +161,7 @@ fn ftp_user() { assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), Some("host.tld")); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("/user/project-name.git")); + assert_eq!(parsed.path(), "/user/project-name.git"); assert_eq!(parsed.print_scheme(), true); } @@ -176,7 +176,7 @@ fn ftps_user() { assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), Some("host.tld")); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("/user/project-name.git")); + assert_eq!(parsed.path(), "/user/project-name.git"); assert_eq!(parsed.print_scheme(), true); } @@ -185,13 +185,14 @@ fn relative_unix_path() { let test_url = "../project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + println! {"{parsed:#?}"}; assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("file")); assert_eq!(parsed.user(), None); assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), None); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("../project-name.git")); + assert_eq!(parsed.path(), "../project-name.git"); assert_eq!(parsed.print_scheme(), false); } @@ -200,13 +201,14 @@ fn absolute_unix_path() { let test_url = "/path/to/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + println! {"{parsed:#?}"}; assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("file")); assert_eq!(parsed.user(), None); assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), None); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("/path/to/project-name.git")); + assert_eq!(parsed.path(), "/path/to/project-name.git"); assert_eq!(parsed.print_scheme(), false); } @@ -216,13 +218,14 @@ fn relative_windows_path() { let test_url = r"..\project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + println! {"{parsed:#?}"}; assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("file")); assert_eq!(parsed.user(), None); assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), None); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("..\\project-name.git")); + assert_eq!(parsed.path(), "..\\project-name.git"); assert_eq!(parsed.print_scheme(), false); } @@ -240,7 +243,7 @@ fn absolute_windows_path() { assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), None); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some(r"c:\project-name.git")); + assert_eq!(parsed.path(), r"c:\project-name.git"); assert_eq!(parsed.print_scheme(), false); } @@ -313,6 +316,6 @@ fn git() { assert_eq!(parsed.token(), None); assert_eq!(parsed.host(), Some("github.com")); assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), Some("/owner/name.git")); + assert_eq!(parsed.path(), "/owner/name.git"); assert_eq!(parsed.print_scheme(), true); } From 1cd9d4ff75c20ce907a815415ff3536e5c5ccfd2 Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Sat, 6 Sep 2025 12:12:19 -0700 Subject: [PATCH 22/32] Starting cleanup for provider parsers --- src/types/provider/mod.rs | 85 +++++++++---------- tests/provider.rs | 167 ++++++++++++++++++-------------------- 2 files changed, 118 insertions(+), 134 deletions(-) diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index 6854248..893d39e 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -1,10 +1,11 @@ -use nom::bytes::complete::{is_not, tag}; -use nom::sequence::separated_pair; -use nom::{IResult, Parser, combinator::opt, combinator::rest}; +use nom::bytes::complete::{is_not, tag, take_until, take_while}; +use nom::sequence::{pair, preceded, separated_pair, terminated}; +use nom::{IResult, Parser, combinator::opt, combinator::recognize, combinator::rest}; use derive_builder::Builder; -use getset::{Getters, Setters}; +use getset::CopyGetters; +use crate::types::GitUrlParseHint; use crate::{GitUrl, GitUrlParseError}; pub trait GitProvider: Clone + std::fmt::Debug { @@ -12,36 +13,28 @@ pub trait GitProvider: Clone + std::fmt::Debug { } // todo: builder and setters be private? -#[derive(Debug, PartialEq, Eq, Clone, Builder, Default, Getters, Setters)] -pub struct GenericProvider { - pub host: String, - pub owner: String, - pub repo: String, +#[derive(Debug, PartialEq, Eq, Clone, Builder, Default, CopyGetters)] +#[getset(get_copy = "pub")] +pub struct GenericProvider<'a> { + pub owner: &'a str, + pub repo: &'a str, } -impl GenericProvider { - fn _get_owner_repo(input: &str) -> IResult<&str, Option<(&str, &str)>> { +impl<'a> GenericProvider<'a> { + fn parse_path(input: &str) -> IResult<&str, (&str, &str)> { let (input, _) = opt(tag("/")).parse(input)?; - opt(separated_pair(is_not("/"), tag("/"), rest)).parse(input) + separated_pair(is_not("/"), tag("/"), take_until(".git")).parse(input) } - // todo pub fn fullname(&self) -> String { format!("{}/{}", self.owner, self.repo) } } -impl GitProvider, GitUrlParseError> for GenericProvider { - fn from_git_url(url: &GitUrl) -> Result { - if let (path, Some(host)) = (url.path(), url.host()) { - if let Ok((_, Some((user, repo)))) = GenericProvider::_get_owner_repo(path) { - Ok(GenericProvider { - host: host.to_string(), - owner: user.to_string(), - repo: repo.to_string(), - }) - } else { - Err(GitUrlParseError::UnexpectedFormat) - } +impl<'a> GitProvider, GitUrlParseError> for GenericProvider<'a> { + fn from_git_url(url: &GitUrl<'a>) -> Result { + let path = (url.path()); + if let Ok((_, (user, repo))) = Self::parse_path(path) { + Ok(GenericProvider { owner: user, repo }) } else { Err(GitUrlParseError::UnexpectedFormat) } @@ -50,28 +43,26 @@ impl GitProvider, GitUrlParseError> for GenericProvider { // todo: builder, optional #[derive(Clone, Debug, PartialEq, Eq, Default)] -pub struct AzureDevOpsProvider { - pub host: String, - pub org: String, - pub project: String, - pub repo: String, +pub struct AzureDevOpsProvider<'a> { + pub org: &'a str, + pub project: &'a str, + pub repo: &'a str, } -impl AzureDevOpsProvider { +impl<'a> AzureDevOpsProvider<'a> { fn _get_user_repo(input: &str) -> IResult<&str, Option<(&str, &str)>> { let (n, _) = opt(tag("/")).parse(input)?; opt(separated_pair(is_not("/"), tag("/"), rest)).parse(n) } } -impl GitProvider, GitUrlParseError> for AzureDevOpsProvider { - fn from_git_url(url: &GitUrl) -> Result { +impl<'a> GitProvider, GitUrlParseError> for AzureDevOpsProvider<'a> { + fn from_git_url(url: &GitUrl<'a>) -> Result { if let (path, Some(host)) = (url.path(), url.host()) { if let Ok((_, Some((user, repo)))) = AzureDevOpsProvider::_get_user_repo(path) { Ok(AzureDevOpsProvider { - host: host.to_string(), - org: String::from(""), - project: String::from(user), - repo: String::from(repo), + org: "", + project: user, + repo: repo, }) } else { Err(GitUrlParseError::UnexpectedFormat) @@ -84,28 +75,26 @@ impl GitProvider, GitUrlParseError> for AzureDevOpsProvider { // todo: builder, optional #[derive(Clone, Debug, PartialEq, Eq, Default)] -pub struct GitLabProvider { - pub host: String, - pub user: String, - pub subgroup: Option>, - pub repo: String, +pub struct GitLabProvider<'a> { + pub user: &'a str, + pub subgroup: Option>, + pub repo: &'a str, } -impl GitLabProvider { +impl<'a> GitLabProvider<'a> { fn _get_user_repo(input: &str) -> IResult<&str, Option<(&str, &str)>> { let (n, _) = opt(tag("/")).parse(input)?; opt(separated_pair(is_not("/"), tag("/"), rest)).parse(n) } } -impl GitProvider, GitUrlParseError> for GitLabProvider { - fn from_git_url(url: &GitUrl) -> Result { +impl<'a> GitProvider, GitUrlParseError> for GitLabProvider<'a> { + fn from_git_url(url: &GitUrl<'a>) -> Result { if let (path, Some(host)) = (url.path(), url.host()) { if let Ok((_, Some((user, repo)))) = GitLabProvider::_get_user_repo(path) { Ok(GitLabProvider { - host: host.to_string(), - user: String::from(""), + user: "", subgroup: None, - repo: String::from(repo), + repo: repo, }) } else { Err(GitUrlParseError::UnexpectedFormat) diff --git a/tests/provider.rs b/tests/provider.rs index 8e07a08..b33811f 100644 --- a/tests/provider.rs +++ b/tests/provider.rs @@ -14,9 +14,8 @@ fn http_generic_git() { let provider_info: GenericProvider = parsed.provider_info().unwrap(); let expected = GenericProvider { - host: "github.com".to_string(), - owner: "tjtelan".to_string(), - repo: "git-url-parse-rs".to_string(), + owner: "tjtelan", + repo: "git-url-parse-rs", }; assert_eq!(provider_info, expected) } @@ -28,9 +27,8 @@ fn ssh_generic_git() { let provider_info: GenericProvider = parsed.provider_info().unwrap(); let expected = GenericProvider { - host: "github.com".to_string(), - owner: "tjtelan".to_string(), - repo: "git-url-parse-rs".to_string(), + owner: "tjtelan", + repo: "git-url-parse-rs", }; assert_eq!(provider_info, expected) } @@ -60,9 +58,8 @@ fn self_host() { let provider_info: GenericProvider = parsed.provider_info().unwrap(); let expected = GenericProvider { - host: "git.example.com:3000".to_string(), - owner: "user".to_string(), - repo: "repo".to_string(), + owner: "user", + repo: "repo", }; assert_eq!(provider_info, expected) } @@ -78,10 +75,9 @@ fn http_azure_devops() { let provider_info: types::AzureDevOpsProvider = parsed.provider_info().unwrap(); let expected = types::AzureDevOpsProvider { - host: "dev.azure.com".to_string(), - org: "CompanyName".to_string(), - project: "ProjectName".to_string(), - repo: "RepoName".to_string(), + org: "CompanyName", + project: "ProjectName", + repo: "RepoName", }; assert_eq!(provider_info, expected) } @@ -93,85 +89,84 @@ fn ssh_azure_devops() { let provider_info: types::AzureDevOpsProvider = parsed.provider_info().unwrap(); let expected = types::AzureDevOpsProvider { - host: "ssh.dev.azure.com".to_string(), - org: "CompanyName".to_string(), - project: "ProjectName".to_string(), - repo: "RepoName".to_string(), - }; - assert_eq!(provider_info, expected) -} - -// GitLab -// https://docs.gitlab.com/topics/git/clone/#clone-with-ssh -// https://gitlab.com/explore/projects/trending?sort=latest_activity_desc -// https://gitlab.com/redhat/red-hat-ci-tools/kernel -#[test] -fn http_gitlab() { - let test_url = "https://gitlab.com/gitlab-org/gitlab.git"; - let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - - let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); - let expected = types::GitLabProvider { - host: "gitlab.com".to_string(), - user: "gitlab-org".to_string(), - subgroup: None, - repo: "gitlab".to_string(), - }; - assert_eq!(provider_info, expected) -} - -#[test] -fn ssh_gitlab() { - let test_url = "git@gitlab.com:gitlab-org/gitlab.git"; - let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - - let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); - let expected = types::GitLabProvider { - host: "gitlab.com".to_string(), - user: "gitlab-org".to_string(), - subgroup: None, - repo: "gitlab".to_string(), - }; - assert_eq!(provider_info, expected) -} - -#[test] -fn http_gitlab_subgroups() { - let test_url = "https://gitlab.com/gitlab-org/sbom/systems/gitlab-core.git"; - let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - - let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); - let expected = types::GitLabProvider { - host: "gitlab.com".to_string(), - user: "gitlab-org".to_string(), - subgroup: Some(vec!["sbom".to_string(), "systems".to_string()]), - repo: "gitlab-core".to_string(), - }; - assert_eq!(provider_info, expected) -} - -#[test] -fn ssh_gitlab_subgroups() { - let test_url = "git@gitlab.com:gitlab-org/sbom/systems/gitlab-core.git"; - let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - - let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); - let expected = types::GitLabProvider { - host: "gitlab.com".to_string(), - user: "gitlab-org".to_string(), - subgroup: Some(vec!["sbom".to_string(), "systems".to_string()]), - repo: "gitlab-core".to_string(), + org: "CompanyName", + project: "ProjectName", + repo: "RepoName", }; assert_eq!(provider_info, expected) } +//// GitLab +//// https://docs.gitlab.com/topics/git/clone/#clone-with-ssh +//// https://gitlab.com/explore/projects/trending?sort=latest_activity_desc +//// https://gitlab.com/redhat/red-hat-ci-tools/kernel //#[test] -//fn filepath() { -// let test_url = "file:///home/user/Documents/"; +//fn http_gitlab() { +// let test_url = "https://gitlab.com/gitlab-org/gitlab.git"; // let parsed = GitUrl::parse(test_url).expect("URL parse failed"); // -// assert!(parsed.provider().is_none()); +// let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); +// let expected = types::GitLabProvider { +// host: "gitlab.com", +// user: "gitlab-org", +// subgroup: None, +// repo: "gitlab", +// }; +// assert_eq!(provider_info, expected) +//} // -// let provider_info: Result = parsed.provider_info(); -// assert!(provider_info.is_err()) +//#[test] +//fn ssh_gitlab() { +// let test_url = "git@gitlab.com:gitlab-org/gitlab.git"; +// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); +// +// let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); +// let expected = types::GitLabProvider { +// host: "gitlab.com", +// user: "gitlab-org", +// subgroup: None, +// repo: "gitlab", +// }; +// assert_eq!(provider_info, expected) //} +// +//#[test] +//fn http_gitlab_subgroups() { +// let test_url = "https://gitlab.com/gitlab-org/sbom/systems/gitlab-core.git"; +// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); +// +// let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); +// let expected = types::GitLabProvider { +// host: "gitlab.com", +// user: "gitlab-org", +// subgroup: Some(vec!["sbom", "systems"]), +// repo: "gitlab-core", +// }; +// assert_eq!(provider_info, expected) +//} +// +//#[test] +//fn ssh_gitlab_subgroups() { +// let test_url = "git@gitlab.com:gitlab-org/sbom/systems/gitlab-core.git"; +// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); +// +// let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); +// let expected = types::GitLabProvider { +// host: "gitlab.com", +// user: "gitlab-org", +// subgroup: Some(vec!["sbom", "systems"]), +// repo: "gitlab-core", +// }; +// assert_eq!(provider_info, expected) +//} +// +////#[test] +////fn filepath() { +//// let test_url = "file:///home/user/Documents/"; +//// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); +//// +//// assert!(parsed.provider().is_none()); +//// +//// let provider_info: Result = parsed.provider_info(); +//// assert!(provider_info.is_err()) +////} From d84723952ca1717100dc40e277099b126026d8ff Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Sat, 6 Sep 2025 17:48:22 -0700 Subject: [PATCH 23/32] Provider parsing and tests --- src/types/error.rs | 23 ++--- src/types/provider/mod.rs | 181 ++++++++++++++++++++++++++++---------- tests/provider.rs | 144 +++++++++++++++--------------- 3 files changed, 211 insertions(+), 137 deletions(-) diff --git a/src/types/error.rs b/src/types/error.rs index 4c45ae5..476c8f6 100644 --- a/src/types/error.rs +++ b/src/types/error.rs @@ -2,26 +2,10 @@ use thiserror::Error; #[derive(Error, Debug)] pub enum GitUrlParseError { - //#[error("Error from derive_builder")] - //DeriveBuilderError(#[from] GitUrlOldBuilderError), + #[error("Nom parsing error: {0}")] + NomParseError(String), - //#[error("Error from Url crate: {0}")] - //UrlParseError(#[from] url::ParseError), - //#[error("No url scheme was found, then failed to normalize as ssh url.")] - //SshUrlNormalizeFailedNoScheme, - - //#[error("No url scheme was found, then failed to normalize as ssh url after adding 'ssh://'")] - //SshUrlNormalizeFailedSchemeAdded, - - //#[error("Failed to normalize as ssh url after adding 'ssh://'")] - //SshUrlNormalizeFailedSchemeAddedWithPorts, - - //#[error("No url scheme was found, then failed to normalize as file url.")] - //FileUrlNormalizeFailedNoScheme, - - //#[error("No url scheme was found, then failed to normalize as file url after adding 'file://'")] - //FileUrlNormalizeFailedSchemeAdded, #[error("Git Url not in expected format")] UnexpectedFormat, @@ -29,6 +13,9 @@ pub enum GitUrlParseError { #[error("Git Url for host using unexpected scheme")] UnexpectedScheme, + #[error("Git Url not supported by provider")] + ProviderUnsupported, + //#[error("Scheme unsupported: {0}")] //UnsupportedScheme(String), //#[error("Host from Url cannot be str or does not exist")] diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index 893d39e..c733e0c 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -3,9 +3,9 @@ use nom::sequence::{pair, preceded, separated_pair, terminated}; use nom::{IResult, Parser, combinator::opt, combinator::recognize, combinator::rest}; use derive_builder::Builder; -use getset::CopyGetters; +use getset::{CloneGetters, CopyGetters}; -use crate::types::GitUrlParseHint; +use crate::types::{GitUrlParseHint, is_alphanum, provider}; use crate::{GitUrl, GitUrlParseError}; pub trait GitProvider: Clone + std::fmt::Debug { @@ -20,9 +20,20 @@ pub struct GenericProvider<'a> { pub repo: &'a str, } impl<'a> GenericProvider<'a> { - fn parse_path(input: &str) -> IResult<&str, (&str, &str)> { - let (input, _) = opt(tag("/")).parse(input)?; - separated_pair(is_not("/"), tag("/"), take_until(".git")).parse(input) + fn parse_path(input: &str) -> Result<(&str, GenericProvider), GitUrlParseError> { + let parse_result = || -> IResult<&str, GenericProvider> { + let (input, _) = opt(tag("/")).parse(input)?; + let (input, (user, repo)) = + separated_pair(is_not("/"), tag("/"), take_until(".git")).parse(input)?; + Ok((input, GenericProvider { owner: user, repo })) + }; + + parse_result().map_err(|e| match e { + nom::Err::Error(err) | nom::Err::Failure(err) => { + GitUrlParseError::NomParseError(err.to_string()) + } + nom::Err::Incomplete(_) => GitUrlParseError::UnexpectedFormat, + }) } pub fn fullname(&self) -> String { @@ -32,75 +43,155 @@ impl<'a> GenericProvider<'a> { impl<'a> GitProvider, GitUrlParseError> for GenericProvider<'a> { fn from_git_url(url: &GitUrl<'a>) -> Result { - let path = (url.path()); - if let Ok((_, (user, repo))) = Self::parse_path(path) { - Ok(GenericProvider { owner: user, repo }) - } else { - Err(GitUrlParseError::UnexpectedFormat) + if url.hint() == GitUrlParseHint::Filelike { + return Err(GitUrlParseError::ProviderUnsupported) } + + let path = url.path(); + Self::parse_path(path).map(|(_, provider)| provider) } } -// todo: builder, optional -#[derive(Clone, Debug, PartialEq, Eq, Default)] +#[derive(Clone, Debug, PartialEq, Eq, Default, CopyGetters)] +#[getset(get_copy = "pub")] pub struct AzureDevOpsProvider<'a> { pub org: &'a str, pub project: &'a str, pub repo: &'a str, } impl<'a> AzureDevOpsProvider<'a> { - fn _get_user_repo(input: &str) -> IResult<&str, Option<(&str, &str)>> { - let (n, _) = opt(tag("/")).parse(input)?; - opt(separated_pair(is_not("/"), tag("/"), rest)).parse(n) + fn parse_http_path(input: &str) -> Result<(&str, AzureDevOpsProvider), GitUrlParseError> { + let parse_result = || -> IResult<&str, AzureDevOpsProvider> { + // Handle optional leading / + let (input, _) = opt(tag("/")).parse(input)?; + + // Parse org/project/repo + let (input, (org, (project, repo))) = separated_pair( + is_not("/"), + tag("/"), + separated_pair( + is_not("/"), + tag("/"), + preceded(opt(tag("_git/")), is_not("")), + ), + ) + .parse(input)?; + + Ok((input, AzureDevOpsProvider { org, project, repo })) + }; + + parse_result().map_err(|e| match e { + nom::Err::Error(err) | nom::Err::Failure(err) => { + GitUrlParseError::NomParseError(err.to_string()) + } + nom::Err::Incomplete(_) => GitUrlParseError::UnexpectedFormat, + }) + } + fn parse_ssh_path(input: &str) -> Result<(&str, AzureDevOpsProvider), GitUrlParseError> { + let parse_result = || -> IResult<&str, AzureDevOpsProvider> { + // Handle optional leading v3/ or other prefix + let (input, _) = opt(take_until("/")).parse(input)?; + let (input, _) = opt(tag("/")).parse(input)?; + + // Parse org/project/repo + let (input, (org, (project, repo))) = separated_pair( + is_not("/"), + tag("/"), + separated_pair( + is_not("/"), + tag("/"), + terminated(is_not("."), opt(tag(".git"))), + ), + ) + .parse(input)?; + + Ok((input, AzureDevOpsProvider { org, project, repo })) + }; + + parse_result().map_err(|e| match e { + nom::Err::Error(err) | nom::Err::Failure(err) => { + GitUrlParseError::NomParseError(err.to_string()) + } + nom::Err::Incomplete(_) => GitUrlParseError::UnexpectedFormat, + }) } } impl<'a> GitProvider, GitUrlParseError> for AzureDevOpsProvider<'a> { fn from_git_url(url: &GitUrl<'a>) -> Result { - if let (path, Some(host)) = (url.path(), url.host()) { - if let Ok((_, Some((user, repo)))) = AzureDevOpsProvider::_get_user_repo(path) { - Ok(AzureDevOpsProvider { - org: "", - project: user, - repo: repo, - }) - } else { - Err(GitUrlParseError::UnexpectedFormat) - } + let path = url.path(); + + let parsed = if url.hint() == GitUrlParseHint::Httplike { + Self::parse_http_path(path) } else { - Err(GitUrlParseError::UnexpectedFormat) - } + Self::parse_ssh_path(path) + }; + + parsed.map(|(_, provider)| provider) } } -// todo: builder, optional -#[derive(Clone, Debug, PartialEq, Eq, Default)] +#[derive(Clone, Debug, PartialEq, Eq, Default, CopyGetters, CloneGetters)] pub struct GitLabProvider<'a> { + #[getset(get_copy = "pub")] pub user: &'a str, + #[getset(get_clone = "pub")] pub subgroup: Option>, + #[getset(get_copy = "pub")] pub repo: &'a str, } impl<'a> GitLabProvider<'a> { - fn _get_user_repo(input: &str) -> IResult<&str, Option<(&str, &str)>> { - let (n, _) = opt(tag("/")).parse(input)?; - opt(separated_pair(is_not("/"), tag("/"), rest)).parse(n) + fn parse_path(input: &str) -> Result<(&str, GitLabProvider), GitUrlParseError> { + let parse_result = || -> IResult<&str, GitLabProvider> { + // Optional leading slash + let (input, _) = opt(tag("/")).parse(input)?; + + // Remove .git extension if present + let input = input.trim_end_matches(".git"); + + // Split the path + let parts: Vec<&str> = input.split('/').filter(|s| !s.is_empty()).collect(); + + // Ensure we have at least 2 parts (owner and repo) + if parts.len() < 2 { + return Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Fail, + ))); + } + + // Last part is the repo + let repo = parts[parts.len() - 1]; + + // Everything before the last part is the owner/subgroups + let (user, subgroup) = if parts.len() > 2 { + (parts[0], Some(parts[1..parts.len() - 1].to_vec())) + } else { + (parts[0], None) + }; + + Ok(( + input, + GitLabProvider { + user, + subgroup, + repo, + }, + )) + }; + + parse_result().map_err(|e| match e { + nom::Err::Error(err) | nom::Err::Failure(err) => { + GitUrlParseError::NomParseError(err.to_string()) + } + nom::Err::Incomplete(_) => GitUrlParseError::UnexpectedFormat, + }) } } impl<'a> GitProvider, GitUrlParseError> for GitLabProvider<'a> { fn from_git_url(url: &GitUrl<'a>) -> Result { - if let (path, Some(host)) = (url.path(), url.host()) { - if let Ok((_, Some((user, repo)))) = GitLabProvider::_get_user_repo(path) { - Ok(GitLabProvider { - user: "", - subgroup: None, - repo: repo, - }) - } else { - Err(GitUrlParseError::UnexpectedFormat) - } - } else { - Err(GitUrlParseError::UnexpectedFormat) - } + let path = url.path(); + Self::parse_path(path).map(|(_, provider)| provider) } } diff --git a/tests/provider.rs b/tests/provider.rs index b33811f..e674771 100644 --- a/tests/provider.rs +++ b/tests/provider.rs @@ -96,77 +96,73 @@ fn ssh_azure_devops() { assert_eq!(provider_info, expected) } -//// GitLab -//// https://docs.gitlab.com/topics/git/clone/#clone-with-ssh -//// https://gitlab.com/explore/projects/trending?sort=latest_activity_desc -//// https://gitlab.com/redhat/red-hat-ci-tools/kernel -//#[test] -//fn http_gitlab() { -// let test_url = "https://gitlab.com/gitlab-org/gitlab.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// -// let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); -// let expected = types::GitLabProvider { -// host: "gitlab.com", -// user: "gitlab-org", -// subgroup: None, -// repo: "gitlab", -// }; -// assert_eq!(provider_info, expected) -//} -// -//#[test] -//fn ssh_gitlab() { -// let test_url = "git@gitlab.com:gitlab-org/gitlab.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// -// let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); -// let expected = types::GitLabProvider { -// host: "gitlab.com", -// user: "gitlab-org", -// subgroup: None, -// repo: "gitlab", -// }; -// assert_eq!(provider_info, expected) -//} -// -//#[test] -//fn http_gitlab_subgroups() { -// let test_url = "https://gitlab.com/gitlab-org/sbom/systems/gitlab-core.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// -// let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); -// let expected = types::GitLabProvider { -// host: "gitlab.com", -// user: "gitlab-org", -// subgroup: Some(vec!["sbom", "systems"]), -// repo: "gitlab-core", -// }; -// assert_eq!(provider_info, expected) -//} -// -//#[test] -//fn ssh_gitlab_subgroups() { -// let test_url = "git@gitlab.com:gitlab-org/sbom/systems/gitlab-core.git"; -// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -// -// let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); -// let expected = types::GitLabProvider { -// host: "gitlab.com", -// user: "gitlab-org", -// subgroup: Some(vec!["sbom", "systems"]), -// repo: "gitlab-core", -// }; -// assert_eq!(provider_info, expected) -//} -// -////#[test] -////fn filepath() { -//// let test_url = "file:///home/user/Documents/"; -//// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -//// -//// assert!(parsed.provider().is_none()); -//// -//// let provider_info: Result = parsed.provider_info(); -//// assert!(provider_info.is_err()) -////} +// GitLab +// https://docs.gitlab.com/topics/git/clone/#clone-with-ssh +// https://gitlab.com/explore/projects/trending?sort=latest_activity_desc +// https://gitlab.com/redhat/red-hat-ci-tools/kernel +#[test] +fn http_gitlab() { + let test_url = "https://gitlab.com/gitlab-org/gitlab.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + + let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); + let expected = types::GitLabProvider { + user: "gitlab-org", + subgroup: None, + repo: "gitlab", + }; + assert_eq!(provider_info, expected) +} + +#[test] +fn ssh_gitlab() { + let test_url = "git@gitlab.com:gitlab-org/gitlab.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + + let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); + let expected = types::GitLabProvider { + user: "gitlab-org", + subgroup: None, + repo: "gitlab", + }; + assert_eq!(provider_info, expected) +} + +#[test] +fn http_gitlab_subgroups() { + let test_url = "https://gitlab.com/gitlab-org/sbom/systems/gitlab-core.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + + let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); + let expected = types::GitLabProvider { + user: "gitlab-org", + subgroup: Some(vec!["sbom", "systems"]), + repo: "gitlab-core", + }; + assert_eq!(provider_info, expected) +} + +#[test] +fn ssh_gitlab_subgroups() { + let test_url = "git@gitlab.com:gitlab-org/sbom/systems/gitlab-core.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + + let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); + let expected = types::GitLabProvider { + user: "gitlab-org", + subgroup: Some(vec!["sbom", "systems"]), + repo: "gitlab-core", + }; + assert_eq!(provider_info, expected) +} + +#[test] +fn filepath() { + let test_url = "file:///home/user/Documents/"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + + //assert!(parsed.provider_info::().is_ok()); + + let provider_info: Result = parsed.provider_info(); + assert!(provider_info.is_err()) +} From a77de4edb82a9c87e49f4d5da0a8e65cd97a246e Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Sun, 7 Sep 2025 22:14:40 -0700 Subject: [PATCH 24/32] Move raw url spec parsing into module --- Cargo.toml | 20 +- src/lib.rs | 5 +- src/types/error.rs | 17 -- src/types/mod.rs | 465 ++++++-------------------------------- src/types/provider/mod.rs | 29 +-- src/types/spec.rs | 458 +++++++++++++++++++++++++++++++++++++ tests/parse.rs | 99 ++++---- tests/provider.rs | 82 +++++-- tests/trim_auth.rs | 198 ++++++++-------- 9 files changed, 755 insertions(+), 618 deletions(-) create mode 100644 src/types/spec.rs diff --git a/Cargo.toml b/Cargo.toml index 89f7faa..05e70b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,10 @@ [package] authors = ["T.J. Telan "] -categories = ["parser-implementations", "encoding"] -description = "A parser for git repo urls based on url crate" +categories = ["parser-implementations"] +description = "A parser for git repo urls" documentation = "https://docs.rs/git-url-parse" edition = "2024" -keywords = ["git", "url", "parsing", "normalize"] +keywords = ["git", "url", "parsing"] license = "MIT" name = "git-url-parse" readme = "README.md" @@ -13,7 +13,7 @@ version = "0.4.6" rust-version = "1.82" [features] -default = ["tracing"] # Do not keep tracing default +default = [] tracing = ["dep:tracing"] #filepath #validate @@ -21,14 +21,14 @@ tracing = ["dep:tracing"] [dependencies] tracing = { version = "0.1", optional = true } -#url = { version = "2.2" } -#strum = { version = "0.27", features = ["derive"] } -thiserror = "2" - nom = "8" -derive_builder = "0.20" +thiserror = "2" getset = "0.1.6" -typed-path = "0.11.0" +url = { version = "2.5" } +#strum = { version = "0.27", features = ["derive"] } + +#derive_builder = "0.20" +#typed-path = "0.11.0" [dev-dependencies] env_logger = "0.11" diff --git a/src/lib.rs b/src/lib.rs index 2f5351d..1c40aec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,4 @@ pub mod types; // Re-exports -pub use types::{GenericProvider, GitProvider, GitUrl, GitUrlParseError}; - -#[cfg(feature = "tracing")] -use tracing::debug; +pub use types::{GitUrl, GitUrlParseError}; diff --git a/src/types/error.rs b/src/types/error.rs index 476c8f6..4efad89 100644 --- a/src/types/error.rs +++ b/src/types/error.rs @@ -5,29 +5,12 @@ pub enum GitUrlParseError { #[error("Nom parsing error: {0}")] NomParseError(String), - #[error("Git Url not in expected format")] UnexpectedFormat, - // FIXME: Keep an eye on this error for removal - #[error("Git Url for host using unexpected scheme")] - UnexpectedScheme, - #[error("Git Url not supported by provider")] ProviderUnsupported, - //#[error("Scheme unsupported: {0}")] - //UnsupportedScheme(String), - //#[error("Host from Url cannot be str or does not exist")] - //UnsupportedUrlHostFormat, - //#[error("Git Url not in expected format for SSH")] - //UnsupportedSshUrlFormat, - //#[error("Normalized URL has no path")] - //EmptyPath, #[error("Found null bytes within input url before parsing")] FoundNullBytes, - - // Maybe remove this. Handled by derive_builder - #[error("Value expected for field: {0}")] - UnexpectedEmptyValue(String), } diff --git a/src/types/mod.rs b/src/types/mod.rs index 12b2c15..d0f0f29 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -1,26 +1,18 @@ mod error; -mod provider; +mod spec; +use spec::*; +pub mod provider; pub use error::GitUrlParseError; -pub use provider::{AzureDevOpsProvider, GenericProvider, GitLabProvider, GitProvider}; use core::str; use std::fmt; use getset::{CloneGetters, CopyGetters, Setters}; use nom::Finish; -use nom::branch::alt; -use nom::bytes::complete::{tag, take_while}; -use nom::character::complete::{alpha1, digit1}; -use nom::combinator::{map_opt, peek, recognize, verify}; -use nom::error::context; -use nom::multi::{many0, many1}; -use nom::sequence::{pair, preceded, separated_pair, terminated}; -use nom::{IResult, Parser, combinator::opt}; #[cfg(feature = "tracing")] use tracing::debug; -use typed_path::{Utf8TypedPath, Utf8TypedPathBuf}; #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] pub(crate) enum GitUrlParseHint { @@ -31,7 +23,7 @@ pub(crate) enum GitUrlParseHint { Httplike, } -#[derive(Clone, CopyGetters, CloneGetters, Setters, Default, PartialEq, Eq)] +#[derive(Clone, CopyGetters, CloneGetters, Debug, Default, Setters, PartialEq, Eq)] pub struct GitUrl<'url> { #[getset(get_copy = "pub", set = "pub(crate)")] scheme: Option<&'url str>, @@ -45,18 +37,11 @@ pub struct GitUrl<'url> { port: Option, #[getset(get_copy = "pub")] path: &'url str, - - //#[getset(skip)] - //url: String, /// Include scheme:// when printing url #[getset(get_copy = "pub")] print_scheme: bool, - #[getset(get_copy = "pub(crate)")] hint: GitUrlParseHint, - ///// Hosted git provider info derived from GitUrl - //#[getset(skip)] - //provider: Option

, } /// Build the printable GitUrl from its components @@ -88,13 +73,13 @@ impl fmt::Display for GitUrl<'_> { (GitUrlParseHint::Httplike, Some(port), path) => { (format!(":{port}"), format!("/{path}")) } - (GitUrlParseHint::Httplike, None, path) => (format!(""), format!("{path}")), + (GitUrlParseHint::Httplike, None, path) => (String::new(), path.to_string()), (GitUrlParseHint::Sshlike, Some(port), path) => { (format!(":{port}"), format!("/{path}")) } - (GitUrlParseHint::Sshlike, None, path) => (format!(""), format!(":{path}")), - (GitUrlParseHint::Filelike, None, path) => (format!(""), format!("{path}")), - _ => (format!(""), format!("")), + (GitUrlParseHint::Sshlike, None, path) => (String::new(), format!(":{path}")), + (GitUrlParseHint::Filelike, None, path) => (String::new(), path.to_string()), + _ => (String::new(), String::new()), }; let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}"); @@ -103,45 +88,6 @@ impl fmt::Display for GitUrl<'_> { } } -// This is to hide `url` from debug output -impl fmt::Debug for GitUrl<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { - #[derive(Debug)] - struct GitUrl<'a> { - scheme: Option<&'a str>, - user: Option<&'a str>, - token: Option<&'a str>, - host: Option<&'a str>, - port: Option, - path: &'a str, - } - - let Self { - //url: _, - scheme, - user, - token, - host, - port, - path, - print_scheme: _, - hint: _, - } = self; - - fmt::Debug::fmt( - &GitUrl { - scheme: *scheme, - user: *user, - token: *token, - host: *host, - port: *port, - path: *path, - }, - f, - ) - } -} - impl<'url> GitUrl<'url> { /// Returns `GitUrl` after removing `user` and `token` values /// Intended use-case is for non-destructive printing GitUrl excluding any embedded auth info @@ -149,18 +95,11 @@ impl<'url> GitUrl<'url> { let mut new_giturl = self.clone(); new_giturl.set_user(None); new_giturl.set_token(None); + #[cfg(feature = "tracing")] + debug!(?new_giturl); new_giturl } - // https://datatracker.ietf.org/doc/html/rfc3986 - // Based on rfc3986, but does not strictly cover the spec - // * No support for: - // * query, fragment, percent-encoding, and much of the edges for path support - // * many forms of ip representations like ipv6, hexdigits - // * Added support for: - // * parsing ssh git urls which use ":" as a delimiter between the authority and path - // * parsing userinfo into user:token (but its officially deprecated, per #section-3.2.1) - // * some limited support for windows/linux filepaths pub fn parse(input: &'url str) -> Result { // Error if there are null bytes within the url // https://github.com/tjtelan/git-url-parse-rs/issues/16 @@ -168,15 +107,14 @@ impl<'url> GitUrl<'url> { return Err(GitUrlParseError::FoundNullBytes); } - //let original = input.to_string(); + let (_input, url_spec_parser) = UrlSpecParser::parse(input).finish().unwrap_or_default(); - let (input, mut scheme) = Self::parse_scheme.parse(input).finish().unwrap_or_default(); - let (_input, heir_part) = Self::parse_hier_part(input).finish().unwrap_or_default(); - - let (user_opt, token_opt) = heir_part.0.0; - let host_opt = heir_part.0.1; - let port_opt = heir_part.0.2; - let mut path = heir_part.1; + let mut scheme = url_spec_parser.scheme(); + let user = url_spec_parser.heir_part().authority().userinfo().user(); + let token = url_spec_parser.heir_part().authority().userinfo().token(); + let host = url_spec_parser.heir_part().authority().host(); + let port = url_spec_parser.heir_part().authority().port(); + let mut path = url_spec_parser.heir_part().path(); // We will respect whether scheme was initially set let print_scheme = scheme.is_some(); @@ -192,28 +130,22 @@ impl<'url> GitUrl<'url> { _ => GitUrlParseHint::Httplike, } } + } else if user.is_none() + && token.is_none() + && host.is_none() + && port.is_none() + && !path.is_empty() + { + // if we only have a path => file + GitUrlParseHint::Filelike + } else if user.is_some() && token.is_some() { + // If we have a user and token => http + GitUrlParseHint::Httplike + } else if path.starts_with(':') { + // If path starts with a colon => ssh + GitUrlParseHint::Sshlike } else { - if user_opt.is_none() - && token_opt.is_none() - && host_opt.is_none() - && port_opt.is_none() - && !path.is_empty() - { - // if we only have a path => file - GitUrlParseHint::Filelike - } else if user_opt.is_some() && token_opt.is_some() { - // If we have a user and token => http - GitUrlParseHint::Httplike - } else if path.starts_with(':') { - // If path starts with a colon => ssh - //if path.starts_with(':') { - GitUrlParseHint::Sshlike - //} else { - // GitUrlParseHint::Unknown - //} - } else { - GitUrlParseHint::Unknown - } + GitUrlParseHint::Unknown }; // If we found an ssh url, we should adjust the path. @@ -237,12 +169,11 @@ impl<'url> GitUrl<'url> { let git_url = GitUrl { scheme, - user: user_opt, - token: token_opt, - host: host_opt, - port: port_opt, + user, + token, + host, + port, path, - //url: original, print_scheme, hint, }; @@ -256,7 +187,7 @@ impl<'url> GitUrl<'url> { pub fn provider_info(&self) -> Result where - T: GitProvider, GitUrlParseError>, + T: provider::GitProvider, GitUrlParseError>, { T::from_git_url(self) } @@ -264,320 +195,56 @@ impl<'url> GitUrl<'url> { fn is_valid(&self) -> bool { // Last chance validation + //if url::Url::parse(&self.to_string()).is_err() { + // return false + //} + //println!("{self:#?}"); // There's an edge case we don't cover: ssh urls using ports + absolute paths // https://mslinn.com/git/040-git-urls.html - describes this pattern, if we decide to parse for it // only ssh paths start with ':' - if self.hint() != GitUrlParseHint::Sshlike { - if self.path.starts_with(':') { - return false; + if self.hint() != GitUrlParseHint::Sshlike && self.path.starts_with(':') { + #[cfg(feature = "tracing")] + { + debug!("{}", self.hint()); + debug!(self.path); + debug!("Only sshlike url path starts with ':'"); + debug!("path starts with ':'?", self.path.starts_with(':')); } + + return false; } // if we are not httplike, we shouldn't have tokens - if self.hint() != GitUrlParseHint::Httplike { - if self.token().is_some() { - return false; + if self.hint() != GitUrlParseHint::Httplike && self.token().is_some() { + #[cfg(feature = "tracing")] + { + debug!("{}", self.hint()); + debug!("Token support only for httplike url", self.token()); } + return false; } // if we are filelike, we should only have paths - if self.hint() == GitUrlParseHint::Filelike { - if self.user().is_some() + if self.hint() == GitUrlParseHint::Filelike + && (self.user().is_some() || self.token().is_some() || self.host().is_some() || self.port().is_some() - || self.path().is_empty() + || self.path().is_empty()) + { + #[cfg(feature = "tracing")] { - return false; + debug!( + "Only scheme and path expected to have values set for filelike urls", + ?self + ); } + return false; } true } - - fn parse_scheme(input: &'url str) -> IResult<&'url str, Option<&'url str>> { - let mut check = context( - "scheme validate", - peek(pair( - pair( - alpha1, - take_while(|c: char| { - c.is_ascii_alphabetic() - || c.is_ascii_digit() - || c == '+' - || c == '-' - || c == '.' - }), - ), - tag::<&str, &str, nom::error::Error<&str>>("://"), - )), - ); - - if check.parse(input).is_err() { - return Ok((input, None)); - } - - // Must start with alpha character, then alpha/digit/+/-/. - context( - "Scheme parse", - opt(verify( - terminated( - recognize(pair( - alpha1, - take_while(|c: char| { - c.is_ascii_alphabetic() - || c.is_ascii_digit() - || c == '+' - || c == '-' - || c == '.' - }), - )), - // Not part of spec. We consume the "://" here to more easily manage scheme to be optional - tag("://"), - ), - |s: &str| !s.is_empty(), - )), - ) - .parse(input) - } - - // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2 - // The rfc says parsing the "//" part of the uri belongs to the hier-part parsing - // but we only support common internet protocols, file paths, but not other "baseless" ones - // so it is sensible for this move it with scheme parsing to support git user service urls - fn parse_hier_part( - input: &'url str, - ) -> IResult< - &'url str, - ( - ((Option<&str>, Option<&str>), Option<&str>, Option), - &'url str, - ), - > { - let (input, authority) = Self::parse_authority(input)?; - //println!("authority: {authority:?}"); - - let (input, part) = context( - "Top of path parsers", - verify( - alt(( - //preceded(tag("//"), Self::path_abempty_parser()), - Self::path_abempty_parser(), - Self::path_rootless_parser(), - Self::path_ssh_parser(), - )), - |s: &str| !s.is_empty(), - ), - ) - .parse(input)?; - - Ok((input, (authority, part))) - } - - fn parse_authority( - input: &'url str, - ) -> IResult<&'url str, ((Option<&str>, Option<&str>), Option<&str>, Option)> { - // Optional: username / token - let (input, userinfo) = Self::parse_userinfo(input)?; - - // Host - - // peek ahead to check for windows path stuff - let check = context( - "Host check for windows path", - peek(preceded( - take_while(|c| reg_name_uri_chars(c) && c != '\\'), - tag::<&str, &str, nom::error::Error<&str>>(":\\"), - )), - ) - .parse(input); - - if check.is_ok() { - return Ok((input, (userinfo, None, None))); - } - - let (input, host) = context( - "Host parser", - opt(verify( - recognize(take_while(|c: char| reg_name_uri_chars(c))), - |s: &str| { - let has_alphanum = s.chars().into_iter().find(|c| is_alphanum(*c)).is_some(); - let starts_with_alphanum = s.chars().next().is_some_and(|c| is_alphanum(c)); - - has_alphanum && starts_with_alphanum && !s.is_empty() - }, - )), - ) - .parse(input)?; - - // Optional: port - let (input, port) = Self::parse_port(input)?; - - Ok((input, (userinfo, host, port))) - } - - fn parse_userinfo( - authority_input: &'url str, - ) -> IResult<&'url str, (Option<&'url str>, Option<&'url str>)> { - // Peek for username@ - let mut check = context( - "Userinfo validation", - peek(pair( - take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':'), - tag::<&str, &str, nom::error::Error<&str>>("@"), - )), - ); - - if check.parse(authority_input).is_err() { - return Ok((authority_input, (None, None))); - } - - // Userinfo - let (authority_input, userinfo) = context( - "Userinfo parser", - opt(verify( - recognize(take_while(|c: char| { - unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' - })), - |s: &str| !s.is_empty(), - )), - ) - .parse(authority_input)?; - - let (authority_input, _) = if userinfo.is_some() { - context("Userinfo '@' parser", tag("@")).parse(authority_input)? - } else { - // No change to input, but let the compiler be happy - (authority_input, authority_input) - }; - - // Break down userinfo into user and token - let (user, token) = if let Some(userinfo) = userinfo { - if userinfo.contains(":") { - let (_, (user, token)) = context( - "Userinfo with colon parser", - separated_pair( - verify( - take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), - |s: &str| !s.is_empty(), - ), - tag(":"), - verify( - take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), - |s: &str| !s.is_empty(), - ), - ), - ) - .parse(userinfo)?; - (Some(user), Some(token)) - } else { - (Some(userinfo), None) - } - } else { - (None, None) - }; - - Ok((authority_input, (user, token))) - } - - fn parse_port(authority_input: &'url str) -> IResult<&'url str, Option> { - context( - "Port parser", - opt(map_opt( - verify(preceded(tag(":"), digit1), |p_str: &str| !p_str.is_empty()), - |s: &str| s.parse::().ok(), - )), - ) - .parse(authority_input) - } - - // This will get absolute paths. - // todo: test for empty and start with "//" - fn path_abempty_parser( - ) -> impl Parser< - &'url str, - Output = > as Parser< - &'url str, - >>::Output, - Error = nom::error::Error<&'url str>, - >{ - // Starts with '/' or empty - context( - "Path parser (abempty)", - recognize(many1(pair( - tag("/"), - take_while(|c: char| pchar_uri_chars(c)), - ))), - ) - } - - fn path_ssh_parser( - ) -> impl Parser< - &'url str, - Output = > as Parser< - &'url str, - >>::Output, - Error = nom::error::Error<&'url str>, - >{ - context( - "Path parser (ssh)", - recognize(( - tag(":"), - take_while(|c: char| pchar_uri_chars(c)), - many1(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), - )), - ) - } - - fn path_rootless_parser( - ) -> impl Parser< - &'url str, - Output = > as Parser< - &'url str, - >>::Output, - Error = nom::error::Error<&'url str>, - >{ - context( - "Path parser (rootless)", - recognize(pair( - take_while(|c: char| pchar_uri_chars(c)), - many0(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), - )), - ) - } -} - -fn pchar_uri_chars(c: char) -> bool { - // unreserved / pct-encoded (not implemented) / sub-delims / ":" / "@" - unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' || c == '@' -} - -fn reg_name_uri_chars(c: char) -> bool { - // *( unreserved / pct-encoded (not implemented) / sub-delims ) - unreserved_uri_chars(c) || subdelims_uri_chars(c) -} -fn unreserved_uri_chars(c: char) -> bool { - is_alphanum(c) || c == '-' || c == '.' || c == '_' || c == '~' -} - -fn is_alphanum(c: char) -> bool { - c.is_ascii_alphabetic() || c.is_ascii_digit() -} - -fn subdelims_uri_chars(c: char) -> bool { - c == '!' - || c == '$' - || c == '&' - || c == '\'' - || c == '(' - || c == ')' - || c == '*' - || c == '+' - || c == ',' - || c == ';' - || c == '=' - || c == '\\' // This is not part of spec, but used for windows paths } diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index c733e0c..f0001fe 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -1,24 +1,23 @@ -use nom::bytes::complete::{is_not, tag, take_until, take_while}; -use nom::sequence::{pair, preceded, separated_pair, terminated}; -use nom::{IResult, Parser, combinator::opt, combinator::recognize, combinator::rest}; +use crate::types::GitUrlParseHint; +use crate::{GitUrl, GitUrlParseError}; -use derive_builder::Builder; use getset::{CloneGetters, CopyGetters}; - -use crate::types::{GitUrlParseHint, is_alphanum, provider}; -use crate::{GitUrl, GitUrlParseError}; +use nom::bytes::complete::{is_not, tag, take_until}; +use nom::combinator::opt; +use nom::sequence::{preceded, separated_pair, terminated}; +use nom::{IResult, Parser}; pub trait GitProvider: Clone + std::fmt::Debug { fn from_git_url(url: &T) -> Result; } -// todo: builder and setters be private? -#[derive(Debug, PartialEq, Eq, Clone, Builder, Default, CopyGetters)] +#[derive(Debug, PartialEq, Eq, Clone, CopyGetters)] #[getset(get_copy = "pub")] pub struct GenericProvider<'a> { pub owner: &'a str, pub repo: &'a str, } + impl<'a> GenericProvider<'a> { fn parse_path(input: &str) -> Result<(&str, GenericProvider), GitUrlParseError> { let parse_result = || -> IResult<&str, GenericProvider> { @@ -44,7 +43,7 @@ impl<'a> GenericProvider<'a> { impl<'a> GitProvider, GitUrlParseError> for GenericProvider<'a> { fn from_git_url(url: &GitUrl<'a>) -> Result { if url.hint() == GitUrlParseHint::Filelike { - return Err(GitUrlParseError::ProviderUnsupported) + return Err(GitUrlParseError::ProviderUnsupported); } let path = url.path(); @@ -52,13 +51,14 @@ impl<'a> GitProvider, GitUrlParseError> for GenericProvider<'a> { } } -#[derive(Clone, Debug, PartialEq, Eq, Default, CopyGetters)] +#[derive(Debug, PartialEq, Eq, Clone, CopyGetters)] #[getset(get_copy = "pub")] pub struct AzureDevOpsProvider<'a> { pub org: &'a str, pub project: &'a str, pub repo: &'a str, } + impl<'a> AzureDevOpsProvider<'a> { fn parse_http_path(input: &str) -> Result<(&str, AzureDevOpsProvider), GitUrlParseError> { let parse_result = || -> IResult<&str, AzureDevOpsProvider> { @@ -134,12 +134,13 @@ impl<'a> GitProvider, GitUrlParseError> for AzureDevOpsProvider<'a> { #[derive(Clone, Debug, PartialEq, Eq, Default, CopyGetters, CloneGetters)] pub struct GitLabProvider<'a> { #[getset(get_copy = "pub")] - pub user: &'a str, + pub owner: &'a str, #[getset(get_clone = "pub")] pub subgroup: Option>, #[getset(get_copy = "pub")] pub repo: &'a str, } + impl<'a> GitLabProvider<'a> { fn parse_path(input: &str) -> Result<(&str, GitLabProvider), GitUrlParseError> { let parse_result = || -> IResult<&str, GitLabProvider> { @@ -164,7 +165,7 @@ impl<'a> GitLabProvider<'a> { let repo = parts[parts.len() - 1]; // Everything before the last part is the owner/subgroups - let (user, subgroup) = if parts.len() > 2 { + let (owner, subgroup) = if parts.len() > 2 { (parts[0], Some(parts[1..parts.len() - 1].to_vec())) } else { (parts[0], None) @@ -173,7 +174,7 @@ impl<'a> GitLabProvider<'a> { Ok(( input, GitLabProvider { - user, + owner, subgroup, repo, }, diff --git a/src/types/spec.rs b/src/types/spec.rs new file mode 100644 index 0000000..791bc4c --- /dev/null +++ b/src/types/spec.rs @@ -0,0 +1,458 @@ +use getset::CopyGetters; +use nom::Finish; +use nom::branch::alt; +use nom::bytes::complete::{tag, take_while}; +use nom::character::complete::{alpha1, digit1}; +use nom::combinator::{map_opt, peek, recognize, verify}; +use nom::error::context; +use nom::multi::{many0, many1}; +use nom::sequence::{pair, preceded, separated_pair, terminated}; +use nom::{IResult, Parser, combinator::opt}; + +#[derive(Debug, Default, Clone, Copy, CopyGetters)] +#[getset(get_copy = "pub")] +pub(crate) struct UrlSpecParser<'url> { + pub(crate) scheme: Option<&'url str>, + pub(crate) heir_part: UrlHeirPart<'url>, +} + +impl<'url> UrlSpecParser<'url> { + // https://datatracker.ietf.org/doc/html/rfc3986 + // Based on rfc3986, but does not strictly cover the spec + // * No support for: + // * query, fragment, percent-encoding, and much of the edges for path support + // * many forms of ip representations like ipv6, hexdigits + // * Added support for: + // * parsing ssh git urls which use ":" as a delimiter between the authority and path + // * parsing userinfo into user:token (but its officially deprecated, per #section-3.2.1) + // * some limited support for windows/linux filepaths + pub(crate) fn parse(input: &'url str) -> IResult<&'url str, Self> { + let (input, scheme) = Self::parse_scheme.parse(input).finish().unwrap_or_default(); + let (input, heir_part) = Self::parse_hier_part(input).finish().unwrap_or_default(); + + let parsed = UrlSpecParser { scheme, heir_part }; + + Ok((input, parsed)) + } + + fn parse_scheme(input: &'url str) -> IResult<&'url str, Option<&'url str>> { + #[cfg(feature = "tracing")] + { + debug!("Looking ahead before parsing for scheme"); + } + + let mut check = context( + "scheme validate", + peek(pair( + pair( + alpha1, + take_while(|c: char| { + c.is_ascii_alphabetic() + || c.is_ascii_digit() + || c == '+' + || c == '-' + || c == '.' + }), + ), + tag::<&str, &str, nom::error::Error<&str>>("://"), + )), + ); + + if check.parse(input).is_err() { + #[cfg(feature = "tracing")] + { + debug!("Look ahead check for scheme failed", ?self.token()); + } + + return Ok((input, None)); + } + + #[cfg(feature = "tracing")] + { + debug!("Look ahead check passed, parsing for scheme"); + } + + // Must start with alpha character, then alpha/digit/+/-/. + let (input, scheme) = context( + "Scheme parse", + opt(verify( + terminated( + recognize(pair( + alpha1, + take_while(|c: char| { + c.is_ascii_alphabetic() + || c.is_ascii_digit() + || c == '+' + || c == '-' + || c == '.' + }), + )), + // Not part of spec. We consume the "://" here to more easily manage scheme to be optional + tag("://"), + ), + |s: &str| !s.is_empty(), + )), + ) + .parse(input)?; + + #[cfg(feature = "tracing")] + { + debug!(?input); + debug!(?scheme); + } + + Ok((input, scheme)) + } + + // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2 + // The rfc says parsing the "//" part of the uri belongs to the hier-part parsing + // but we only support common internet protocols, file paths, but not other "baseless" ones + // so it is sensible for this move it with scheme parsing to support git user service urls + fn parse_hier_part(input: &'url str) -> IResult<&'url str, UrlHeirPart<'url>> { + #[cfg(feature = "tracing")] + { + debug!("Parsing for heir-part"); + } + + let (input, authority) = Self::parse_authority(input)?; + + let (input, path) = context( + "Top of path parsers", + verify( + alt(( + //preceded(tag("//"), Self::path_abempty_parser()), + Self::path_abempty_parser(), + Self::path_rootless_parser(), + Self::path_ssh_parser(), + )), + |s: &str| !s.is_empty(), + ), + ) + .parse(input)?; + + let hier_part = UrlHeirPart { authority, path }; + + #[cfg(feature = "tracing")] + { + debug!(?input); + debug!(?heir_part); + } + + Ok((input, hier_part)) + } + + fn parse_authority(input: &'url str) -> IResult<&'url str, UrlAuthority<'url>> { + #[cfg(feature = "tracing")] + { + debug!("Parsing for Authority"); + } + + // Optional: username / token + let (input, userinfo) = Self::parse_userinfo(input)?; + + // Host + #[cfg(feature = "tracing")] + { + debug!("Looking ahead for windows-style path vs host"); + } + + // peek ahead to check for windows path stuff + let check = context( + "Host check for windows path", + peek(preceded( + take_while(|c| reg_name_uri_chars(c) && c != '\\'), + tag::<&str, &str, nom::error::Error<&str>>(":\\"), + )), + ) + .parse(input); + + if check.is_ok() { + #[cfg(feature = "tracing")] + { + debug!( + "Host check failed. Found potential windows-style path while looking for host" + ); + } + + return Ok((input, UrlAuthority::default())); + } + + #[cfg(feature = "tracing")] + { + debug!("Parsing for host"); + } + + let (input, host) = context( + "Host parser", + opt(verify( + recognize(take_while(|c: char| reg_name_uri_chars(c))), + |s: &str| { + let has_alphanum = s.chars().any(is_alphanum); + let starts_with_alphanum = s.chars().next().is_some_and(is_alphanum); + + has_alphanum && starts_with_alphanum && !s.is_empty() + }, + )), + ) + .parse(input)?; + + #[cfg(feature = "tracing")] + { + debug!("host found", ?host); + } + + // Optional: port + let (input, port) = Self::parse_port(input)?; + + let authority = UrlAuthority { + userinfo, + host, + port, + }; + + #[cfg(feature = "tracing")] + { + debug!(?input); + debug!(?authority); + } + + Ok((input, authority)) + } + + fn parse_userinfo(authority_input: &'url str) -> IResult<&'url str, UrlUserInfo<'url>> { + // Peek for username@ + #[cfg(feature = "tracing")] + { + debug!("Checking for for Userinfo"); + } + + let mut check = context( + "Userinfo validation", + peek(pair( + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':'), + tag::<&str, &str, nom::error::Error<&str>>("@"), + )), + ); + + if check.parse(authority_input).is_err() { + #[cfg(feature = "tracing")] + { + debug!("Userinfo check failed"); + } + return Ok((authority_input, UrlUserInfo::default())); + } + + // Userinfo + let (authority_input, userinfo) = context( + "Userinfo parser", + opt(verify( + recognize(take_while(|c: char| { + unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' + })), + |s: &str| !s.is_empty(), + )), + ) + .parse(authority_input)?; + + let (authority_input, _) = if userinfo.is_some() { + #[cfg(feature = "tracing")] + { + debug!("Userinfo found. Parsing for '@'"); + } + + context("Userinfo '@' parser", tag("@")).parse(authority_input)? + } else { + // No change to input, but let the compiler be happy + (authority_input, authority_input) + }; + + // Break down userinfo into user and token + let (user, token) = if let Some(userinfo) = userinfo { + if userinfo.contains(":") { + #[cfg(feature = "tracing")] + { + debug!("Continue break down userinfo into user:token"); + } + let (_, (user, token)) = context( + "Userinfo with colon parser", + separated_pair( + verify( + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + |s: &str| !s.is_empty(), + ), + tag(":"), + verify( + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + |s: &str| !s.is_empty(), + ), + ), + ) + .parse(userinfo)?; + (Some(user), Some(token)) + } else { + (Some(userinfo), None) + } + } else { + (None, None) + }; + + let userinfo = UrlUserInfo { user, token }; + + #[cfg(feature = "tracing")] + { + debug!(?input); + debug!(?userinfo); + } + + Ok((authority_input, userinfo)) + } + + fn parse_port(authority_input: &'url str) -> IResult<&'url str, Option> { + #[cfg(feature = "tracing")] + { + debug!("Parsing port"); + } + + let (input, port) = context( + "Port parser", + opt(map_opt( + verify(preceded(tag(":"), digit1), |p_str: &str| !p_str.is_empty()), + |s: &str| s.parse::().ok(), + )), + ) + .parse(authority_input)?; + + #[cfg(feature = "tracing")] + { + debug!(?input); + debug!(?port); + } + + Ok((input, port)) + } + + // This will get absolute paths. + // todo: test for empty and start with "//" + fn path_abempty_parser( + ) -> impl Parser< + &'url str, + Output = > as Parser< + &'url str, + >>::Output, + Error = nom::error::Error<&'url str>, + >{ + #[cfg(feature = "tracing")] + { + debug!("parsing abempty path", ?path); + } + + // Starts with '/' or empty + context( + "Path parser (abempty)", + recognize(many1(pair( + tag("/"), + take_while(|c: char| pchar_uri_chars(c)), + ))), + ) + } + + fn path_ssh_parser( + ) -> impl Parser< + &'url str, + Output = > as Parser< + &'url str, + >>::Output, + Error = nom::error::Error<&'url str>, + >{ + #[cfg(feature = "tracing")] + { + debug!("Parsing ssh path", ?path); + } + + context( + "Path parser (ssh)", + recognize(( + tag(":"), + take_while(|c: char| pchar_uri_chars(c)), + many1(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), + )), + ) + } + + fn path_rootless_parser( + ) -> impl Parser< + &'url str, + Output = > as Parser< + &'url str, + >>::Output, + Error = nom::error::Error<&'url str>, + >{ + #[cfg(feature = "tracing")] + { + debug!("Parsing rootless path", ?path); + } + + context( + "Path parser (rootless)", + recognize(pair( + take_while(|c: char| pchar_uri_chars(c)), + many0(pair(tag("/"), take_while(|c: char| pchar_uri_chars(c)))), + )), + ) + } +} + +#[derive(Debug, Default, Clone, Copy, CopyGetters)] +#[getset(get_copy = "pub")] +pub(crate) struct UrlUserInfo<'url> { + pub(crate) user: Option<&'url str>, + pub(crate) token: Option<&'url str>, +} + +#[derive(Debug, Default, Clone, Copy, CopyGetters)] +#[getset(get_copy = "pub")] +pub(crate) struct UrlAuthority<'url> { + pub(crate) userinfo: UrlUserInfo<'url>, + pub(crate) host: Option<&'url str>, + pub(crate) port: Option, +} + +#[derive(Debug, Default, Clone, Copy, CopyGetters)] +#[getset(get_copy = "pub")] +pub(crate) struct UrlHeirPart<'url> { + pub(crate) authority: UrlAuthority<'url>, + pub(crate) path: &'url str, +} + +pub(crate) fn pchar_uri_chars(c: char) -> bool { + // unreserved / pct-encoded (not implemented) / sub-delims / ":" / "@" + unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' || c == '@' +} + +pub(crate) fn reg_name_uri_chars(c: char) -> bool { + // *( unreserved / pct-encoded (not implemented) / sub-delims ) + unreserved_uri_chars(c) || subdelims_uri_chars(c) +} + +pub(crate) fn unreserved_uri_chars(c: char) -> bool { + is_alphanum(c) || c == '-' || c == '.' || c == '_' || c == '~' +} + +pub(crate) fn is_alphanum(c: char) -> bool { + c.is_ascii_alphabetic() || c.is_ascii_digit() +} + +pub(crate) fn subdelims_uri_chars(c: char) -> bool { + c == '!' + || c == '$' + || c == '&' + || c == '\'' + || c == '(' + || c == ')' + || c == '*' + || c == '+' + || c == ',' + || c == ';' + || c == '=' + || c == '\\' // This is not part of spec, but used for windows paths +} diff --git a/tests/parse.rs b/tests/parse.rs index c6d1543..3fc2055 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -247,53 +247,8 @@ fn absolute_windows_path() { assert_eq!(parsed.print_scheme(), false); } -//// Move test -////#[test] -////fn ssh_user_path_not_acctname_reponame_format() { -//// let test_url = "git@test.com:repo"; -//// let e = GitUrl::parse(test_url); -//// -//// assert!(e.is_err()); -//// assert_eq!( -//// format!("{}", e.err().unwrap()), -//// "Git Url not in expected format" -//// ); -////} -// -//// Move test -////#[test] -////fn ssh_without_organization() { -//// let test_url = "ssh://f589726c3611:29418/repo"; -//// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -//// let expected = GitUrl { -//// host: Some("f589726c3611".to_string()), -//// //name: "repo".to_string(), -//// //owner: Some("repo".to_string()), -//// //organization: None, -//// //fullname: "repo/repo".to_string(), -//// scheme: Some(Scheme::Ssh), -//// user: None, -//// token: None, -//// port: Some(29418), -//// path: "repo".to_string(), -//// //git_suffix: false, -//// //scheme_prefix: true, -//// print_scheme: true, -//// }; -//// -//// assert_eq!(parsed, expected); -////} -// -////#[test] -////fn empty_path() { -//// assert_eq!( -//// GitUrlParseError::EmptyPath, -//// GitUrl::parse("file://").unwrap_err() -//// ) -////} - #[test] -fn bad_port_number() { +fn bad_port_1() { let test_url = "https://github.com:crypto-browserify/browserify-rsa.git"; let e = GitUrl::parse(test_url); @@ -304,18 +259,46 @@ fn bad_port_number() { //); } -// This test might not have a use anymore if we're not expanding "git:" -> "git://" #[test] -fn git() { - let test_url = "git://github.com/owner/name.git"; - let parsed = GitUrl::parse(test_url).expect("URL parse failed"); +fn bad_port_2() { + let test_url = "https://example.org:7z"; + let e = GitUrl::parse(test_url); - assert_eq!(parsed.to_string(), test_url); - assert_eq!(parsed.scheme(), Some("git")); - assert_eq!(parsed.user(), None); - assert_eq!(parsed.token(), None); - assert_eq!(parsed.host(), Some("github.com")); - assert_eq!(parsed.port(), None); - assert_eq!(parsed.path(), "/owner/name.git"); - assert_eq!(parsed.print_scheme(), true); + assert!(e.is_err()); + //assert_eq!( + // format!("{}", e.err().unwrap()), + // "Error from Url crate: invalid port number" + //); +} + +#[test] +fn port_out_of_range() { + let test_url = "https://example.org:70000"; + let e = GitUrl::parse(test_url); + + assert!(e.is_err()); +} + +#[test] +fn host_missing_1() { + let test_url = "https://:443"; + let e = GitUrl::parse(test_url); + + assert!(e.is_err()); +} + +#[test] +fn host_missing_2() { + let test_url = "https://user:pass@"; + let e = GitUrl::parse(test_url); + + assert!(e.is_err()); +} + +#[test] +fn host_invalid() { + let test_url = "foo://exa[mple.org"; + let e = GitUrl::parse(test_url); + + assert!(e.is_err()); } diff --git a/tests/provider.rs b/tests/provider.rs index e674771..5a8e49f 100644 --- a/tests/provider.rs +++ b/tests/provider.rs @@ -1,4 +1,7 @@ -use git_url_parse::*; +use git_url_parse::types::provider::{ + AzureDevOpsProvider, GenericProvider, GitLabProvider, GitProvider, +}; +use git_url_parse::{GitUrl, GitUrlParseError}; // GitHub // https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository @@ -73,8 +76,8 @@ fn http_azure_devops() { let test_url = "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let provider_info: types::AzureDevOpsProvider = parsed.provider_info().unwrap(); - let expected = types::AzureDevOpsProvider { + let provider_info: AzureDevOpsProvider = parsed.provider_info().unwrap(); + let expected = AzureDevOpsProvider { org: "CompanyName", project: "ProjectName", repo: "RepoName", @@ -87,8 +90,8 @@ fn ssh_azure_devops() { let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let provider_info: types::AzureDevOpsProvider = parsed.provider_info().unwrap(); - let expected = types::AzureDevOpsProvider { + let provider_info: AzureDevOpsProvider = parsed.provider_info().unwrap(); + let expected = AzureDevOpsProvider { org: "CompanyName", project: "ProjectName", repo: "RepoName", @@ -105,9 +108,9 @@ fn http_gitlab() { let test_url = "https://gitlab.com/gitlab-org/gitlab.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); - let expected = types::GitLabProvider { - user: "gitlab-org", + let provider_info: GitLabProvider = parsed.provider_info().unwrap(); + let expected = GitLabProvider { + owner: "gitlab-org", subgroup: None, repo: "gitlab", }; @@ -119,9 +122,9 @@ fn ssh_gitlab() { let test_url = "git@gitlab.com:gitlab-org/gitlab.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); - let expected = types::GitLabProvider { - user: "gitlab-org", + let provider_info: GitLabProvider = parsed.provider_info().unwrap(); + let expected = GitLabProvider { + owner: "gitlab-org", subgroup: None, repo: "gitlab", }; @@ -133,9 +136,9 @@ fn http_gitlab_subgroups() { let test_url = "https://gitlab.com/gitlab-org/sbom/systems/gitlab-core.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); - let expected = types::GitLabProvider { - user: "gitlab-org", + let provider_info: GitLabProvider = parsed.provider_info().unwrap(); + let expected = GitLabProvider { + owner: "gitlab-org", subgroup: Some(vec!["sbom", "systems"]), repo: "gitlab-core", }; @@ -147,9 +150,9 @@ fn ssh_gitlab_subgroups() { let test_url = "git@gitlab.com:gitlab-org/sbom/systems/gitlab-core.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - let provider_info: types::GitLabProvider = parsed.provider_info().unwrap(); - let expected = types::GitLabProvider { - user: "gitlab-org", + let provider_info: GitLabProvider = parsed.provider_info().unwrap(); + let expected = GitLabProvider { + owner: "gitlab-org", subgroup: Some(vec!["sbom", "systems"]), repo: "gitlab-core", }; @@ -166,3 +169,48 @@ fn filepath() { let provider_info: Result = parsed.provider_info(); assert!(provider_info.is_err()) } + +//// Move test +////#[test] +////fn ssh_user_path_not_acctname_reponame_format() { +//// let test_url = "git@test.com:repo"; +//// let e = GitUrl::parse(test_url); +//// +//// assert!(e.is_err()); +//// assert_eq!( +//// format!("{}", e.err().unwrap()), +//// "Git Url not in expected format" +//// ); +////} +// +//// Move test +////#[test] +////fn ssh_without_organization() { +//// let test_url = "ssh://f589726c3611:29418/repo"; +//// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); +//// let expected = GitUrl { +//// host: Some("f589726c3611".to_string()), +//// //name: "repo".to_string(), +//// //owner: Some("repo".to_string()), +//// //organization: None, +//// //fullname: "repo/repo".to_string(), +//// scheme: Some(Scheme::Ssh), +//// user: None, +//// token: None, +//// port: Some(29418), +//// path: "repo".to_string(), +//// //git_suffix: false, +//// //scheme_prefix: true, +//// print_scheme: true, +//// }; +//// +//// assert_eq!(parsed, expected); +////} +// +////#[test] +////fn empty_path() { +//// assert_eq!( +//// GitUrlParseError::EmptyPath, +//// GitUrl::parse("file://").unwrap_err() +//// ) +////} diff --git a/tests/trim_auth.rs b/tests/trim_auth.rs index d29be60..73d4e5e 100644 --- a/tests/trim_auth.rs +++ b/tests/trim_auth.rs @@ -1,101 +1,101 @@ use git_url_parse::*; -//#[test] -//fn ssh_user_ports() { -// let test_url = "ssh://git@host.tld:9999/user/project-name.git"; -// let parsed_and_trimmed = GitUrl::parse(test_url) -// .expect("URL parse failed") -// .trim_auth(); -// let expected = "ssh://host.tld:9999/user/project-name.git"; -// -// assert_eq!(format!("{}", parsed_and_trimmed), expected); -//} -// -//// Specific service support -//#[test] -//fn https_user_bitbucket() { -// let test_url = "https://user@bitbucket.org/user/repo.git"; -// let parsed_and_trimmed = GitUrl::parse(test_url) -// .expect("URL parse failed") -// .trim_auth(); -// let expected = "https://bitbucket.org/user/repo.git"; -// -// assert_eq!(format!("{}", parsed_and_trimmed), expected); -//} -// -//#[test] -//fn ssh_user_bitbucket() { -// let test_url = "git@bitbucket.org:user/repo.git"; -// let parsed_and_trimmed = GitUrl::parse(test_url) -// .expect("URL parse failed") -// .trim_auth(); -// let expected = "bitbucket.org:user/repo.git"; -// -// assert_eq!(format!("{}", parsed_and_trimmed), expected); -//} -// -//#[test] -//fn https_user_auth_bitbucket() { -// let test_url = "https://x-token-auth:token@bitbucket.org/owner/name.git/"; -// let parsed_and_trimmed = GitUrl::parse(test_url) -// .expect("URL parse failed") -// .trim_auth(); -// let expected = "https://bitbucket.org/owner/name.git/"; -// -// assert_eq!(format!("{}", parsed_and_trimmed), expected); -//} -// -//#[test] -//fn https_user_github() { -// let test_url = "https://user@github.com/user/repo.git/"; -// let parsed_and_trimmed = GitUrl::parse(test_url) -// .expect("URL parse failed") -// .trim_auth(); -// let expected = "https://github.com/user/repo.git/"; -// -// assert_eq!(format!("{}", parsed_and_trimmed), expected); -//} -// -//#[test] -//fn ssh_user_github() { -// let test_url = "git@github.com:user/repo.git"; -// let parsed_and_trimmed = GitUrl::parse(test_url) -// .expect("URL parse failed") -// .trim_auth(); -// let expected = "github.com:user/repo.git"; -// -// assert_eq!(format!("{}", parsed_and_trimmed), expected); -//} -// -//#[test] -//fn https_user_auth_github() { -// let test_url = "https://token:x-oauth-basic@github.com/owner/name.git/"; -// let parsed_and_trimmed = GitUrl::parse(test_url) -// .expect("URL parse failed") -// .trim_auth(); -// let expected = "https://github.com/owner/name.git/"; -// -// assert_eq!(format!("{}", parsed_and_trimmed), expected); -//} -// -//#[test] -//fn ssh_user_azure_devops() { -// let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; -// let parsed_and_trimmed = GitUrl::parse(test_url) -// .expect("URL parse failed") -// .trim_auth(); -// let expected = "ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; -// -// assert_eq!(format!("{}", parsed_and_trimmed), expected); -//} -// -//#[test] -//fn https_user_azure_devops() { -// let test_url = "https://organization@dev.azure.com/organization/project/_git/repo"; -// let parsed_and_trimmed = GitUrl::parse(test_url) -// .expect("URL parse failed") -// .trim_auth(); -// let expected = "https://dev.azure.com/organization/project/_git/repo"; -// -// assert_eq!(format!("{}", parsed_and_trimmed), expected); -//} +#[test] +fn ssh_user_ports() { + let test_url = "ssh://git@host.tld:9999/user/project-name.git"; + let expected = "ssh://host.tld:9999/user/project-name.git"; + + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let trimmed = parsed.trim_auth(); + + assert_eq!(format!("{trimmed}"), expected); +} + +// Specific service support +#[test] +fn https_user_bitbucket() { + let test_url = "https://user@bitbucket.org/user/repo.git"; + let expected = "https://bitbucket.org/user/repo.git"; + + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let trimmed = parsed.trim_auth(); + + assert_eq!(format!("{trimmed}"), expected); +} + +#[test] +fn ssh_user_bitbucket() { + let test_url = "git@bitbucket.org:user/repo.git"; + let expected = "bitbucket.org:user/repo.git"; + + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let trimmed = parsed.trim_auth(); + + assert_eq!(format!("{trimmed}"), expected); +} + +#[test] +fn https_user_auth_bitbucket() { + let test_url = "https://x-token-auth:token@bitbucket.org/owner/name.git/"; + let expected = "https://bitbucket.org/owner/name.git/"; + + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let trimmed = parsed.trim_auth(); + + assert_eq!(format!("{trimmed}"), expected); +} + +#[test] +fn https_user_github() { + let test_url = "https://user@github.com/user/repo.git/"; + let expected = "https://github.com/user/repo.git/"; + + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let trimmed = parsed.trim_auth(); + + assert_eq!(format!("{trimmed}"), expected); +} + +#[test] +fn ssh_user_github() { + let test_url = "git@github.com:user/repo.git"; + let expected = "github.com:user/repo.git"; + + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let trimmed = parsed.trim_auth(); + + assert_eq!(format!("{trimmed}"), expected); +} + +#[test] +fn https_user_auth_github() { + let test_url = "https://token:x-oauth-basic@github.com/owner/name.git/"; + let expected = "https://github.com/owner/name.git/"; + + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let trimmed = parsed.trim_auth(); + + assert_eq!(format!("{trimmed}"), expected); +} + +#[test] +fn ssh_user_azure_devops() { + let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; + let expected = "ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; + + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let trimmed = parsed.trim_auth(); + + assert_eq!(format!("{trimmed}"), expected); +} + +#[test] +fn https_user_azure_devops() { + let test_url = "https://organization@dev.azure.com/organization/project/_git/repo"; + let expected = "https://dev.azure.com/organization/project/_git/repo"; + + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + let trimmed = parsed.trim_auth(); + + assert_eq!(format!("{trimmed}"), expected); +} From e78d74c7e5b05babb992eebe47040bfc378c8679 Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Mon, 8 Sep 2025 01:03:19 -0700 Subject: [PATCH 25/32] Updating GitUrlParseError --- src/types/error.rs | 42 +++++++++- src/types/mod.rs | 63 ++++++++++----- src/types/provider/mod.rs | 166 +++++++++++++++----------------------- src/types/spec.rs | 29 ++++--- tests/parse.rs | 30 ++++--- tests/provider.rs | 68 ++-------------- tests/trim_auth.rs | 18 ++--- 7 files changed, 195 insertions(+), 221 deletions(-) diff --git a/src/types/error.rs b/src/types/error.rs index 4efad89..cda0b3f 100644 --- a/src/types/error.rs +++ b/src/types/error.rs @@ -1,16 +1,50 @@ use thiserror::Error; -#[derive(Error, Debug)] +#[derive(Error, Debug, PartialEq, Eq)] pub enum GitUrlParseError { - #[error("Nom parsing error: {0}")] + #[error("Error from Url crate: {0}")] + UrlParseError(#[from] url::ParseError), + + #[error("Nom crate parsing error: {0}")] NomParseError(String), - #[error("Git Url not in expected format")] - UnexpectedFormat, + #[error("Git Url must have a path")] + InvalidPathEmpty, + + #[error("Invalid port number")] + InvalidPortNumber, + + #[error("Tokens only supported by httplike urls")] + InvalidTokenUnsupported, + + #[error("Filelike urls expect only scheme and/or path")] + InvalidFilePattern, #[error("Git Url not supported by provider")] ProviderUnsupported, #[error("Found null bytes within input url before parsing")] FoundNullBytes, + + #[error("Provider info parse failed: {0}")] + ProviderParseFail(String), + + #[error("Unexpected error occurred during parsing")] + UnexpectedError, +} + +impl<'a> From> for GitUrlParseError { + fn from(err: nom::Err<(&'a str, nom::error::ErrorKind)>) -> Self { + match err { + nom::Err::Error((input, kind)) => GitUrlParseError::NomParseError(format!( + "Parse error at: {}, kind: {:?}", + input, kind + )), + nom::Err::Failure((input, kind)) => GitUrlParseError::NomParseError(format!( + "Parse failure at: {}, kind: {:?}", + input, kind + )), + nom::Err::Incomplete(_) => GitUrlParseError::UnexpectedError, + } + } } diff --git a/src/types/mod.rs b/src/types/mod.rs index d0f0f29..7dd64a9 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -35,10 +35,10 @@ pub struct GitUrl<'url> { host: Option<&'url str>, #[getset(get_copy = "pub")] port: Option, - #[getset(get_copy = "pub")] + #[getset(get_copy = "pub", set = "pub(crate)")] path: &'url str, /// Include scheme:// when printing url - #[getset(get_copy = "pub")] + #[getset(get_copy = "pub", set = "pub(crate)")] print_scheme: bool, #[getset(get_copy = "pub(crate)")] hint: GitUrlParseHint, @@ -47,7 +47,23 @@ pub struct GitUrl<'url> { /// Build the printable GitUrl from its components impl fmt::Display for GitUrl<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let scheme = if self.print_scheme() { + let git_url_str = self.display(); + + write!(f, "{git_url_str}",) + } +} + +impl<'url> GitUrl<'url> { + fn display(&self) -> String { + self.build_string(false) + } + + fn url_compat_display(&self) -> String { + self.build_string(true) + } + + fn build_string(&self, url_compat: bool) -> String { + let scheme = if self.print_scheme() || url_compat { if let Some(scheme) = self.scheme() { format!("{scheme}://") } else { @@ -77,14 +93,19 @@ impl fmt::Display for GitUrl<'_> { (GitUrlParseHint::Sshlike, Some(port), path) => { (format!(":{port}"), format!("/{path}")) } - (GitUrlParseHint::Sshlike, None, path) => (String::new(), format!(":{path}")), + (GitUrlParseHint::Sshlike, None, path) => { + if url_compat { + (String::new(), format!("/{path}")) + } else { + (String::new(), format!(":{path}")) + } + } (GitUrlParseHint::Filelike, None, path) => (String::new(), path.to_string()), _ => (String::new(), String::new()), }; let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}"); - - write!(f, "{git_url_str}",) + git_url_str } } @@ -178,11 +199,9 @@ impl<'url> GitUrl<'url> { hint, }; - if git_url.is_valid() { - Ok(git_url) - } else { - Err(GitUrlParseError::UnexpectedFormat) - } + let _check = git_url.is_valid()?; + + Ok(git_url) } pub fn provider_info(&self) -> Result @@ -192,15 +211,15 @@ impl<'url> GitUrl<'url> { T::from_git_url(self) } - fn is_valid(&self) -> bool { + fn is_valid(&self) -> Result<(), GitUrlParseError> { // Last chance validation - //if url::Url::parse(&self.to_string()).is_err() { - // return false - //} - //println!("{self:#?}"); + if self.path().is_empty() { + return Err(GitUrlParseError::InvalidPathEmpty); + } + // There's an edge case we don't cover: ssh urls using ports + absolute paths // https://mslinn.com/git/040-git-urls.html - describes this pattern, if we decide to parse for it @@ -214,7 +233,7 @@ impl<'url> GitUrl<'url> { debug!("path starts with ':'?", self.path.starts_with(':')); } - return false; + return Err(GitUrlParseError::InvalidPortNumber); } // if we are not httplike, we shouldn't have tokens @@ -224,7 +243,7 @@ impl<'url> GitUrl<'url> { debug!("{}", self.hint()); debug!("Token support only for httplike url", self.token()); } - return false; + return Err(GitUrlParseError::InvalidTokenUnsupported); } // if we are filelike, we should only have paths @@ -242,9 +261,13 @@ impl<'url> GitUrl<'url> { ?self ); } - return false; + return Err(GitUrlParseError::InvalidFilePattern); } - true + // Since we don't fully implement any spec, we'll rely on the url crate + println!("{:#?}", self.url_compat_display()); + let _u = url::Url::parse(&self.url_compat_display())?; + + Ok(()) } } diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index f0001fe..1ed948f 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -2,10 +2,10 @@ use crate::types::GitUrlParseHint; use crate::{GitUrl, GitUrlParseError}; use getset::{CloneGetters, CopyGetters}; +use nom::Parser; use nom::bytes::complete::{is_not, tag, take_until}; use nom::combinator::opt; use nom::sequence::{preceded, separated_pair, terminated}; -use nom::{IResult, Parser}; pub trait GitProvider: Clone + std::fmt::Debug { fn from_git_url(url: &T) -> Result; @@ -20,19 +20,10 @@ pub struct GenericProvider<'a> { impl<'a> GenericProvider<'a> { fn parse_path(input: &str) -> Result<(&str, GenericProvider), GitUrlParseError> { - let parse_result = || -> IResult<&str, GenericProvider> { - let (input, _) = opt(tag("/")).parse(input)?; - let (input, (user, repo)) = - separated_pair(is_not("/"), tag("/"), take_until(".git")).parse(input)?; - Ok((input, GenericProvider { owner: user, repo })) - }; - - parse_result().map_err(|e| match e { - nom::Err::Error(err) | nom::Err::Failure(err) => { - GitUrlParseError::NomParseError(err.to_string()) - } - nom::Err::Incomplete(_) => GitUrlParseError::UnexpectedFormat, - }) + let (input, _) = opt(tag("/")).parse(input)?; + let (input, (user, repo)) = + separated_pair(is_not("/"), tag("/"), take_until(".git")).parse(input)?; + Ok((input, GenericProvider { owner: user, repo })) } pub fn fullname(&self) -> String { @@ -61,59 +52,42 @@ pub struct AzureDevOpsProvider<'a> { impl<'a> AzureDevOpsProvider<'a> { fn parse_http_path(input: &str) -> Result<(&str, AzureDevOpsProvider), GitUrlParseError> { - let parse_result = || -> IResult<&str, AzureDevOpsProvider> { - // Handle optional leading / - let (input, _) = opt(tag("/")).parse(input)?; - - // Parse org/project/repo - let (input, (org, (project, repo))) = separated_pair( + // Handle optional leading / + let (input, _) = opt(tag("/")).parse(input)?; + + // Parse org/project/repo + let (input, (org, (project, repo))) = separated_pair( + is_not("/"), + tag("/"), + separated_pair( is_not("/"), tag("/"), - separated_pair( - is_not("/"), - tag("/"), - preceded(opt(tag("_git/")), is_not("")), - ), - ) - .parse(input)?; - - Ok((input, AzureDevOpsProvider { org, project, repo })) - }; + preceded(opt(tag("_git/")), is_not("")), + ), + ) + .parse(input)?; - parse_result().map_err(|e| match e { - nom::Err::Error(err) | nom::Err::Failure(err) => { - GitUrlParseError::NomParseError(err.to_string()) - } - nom::Err::Incomplete(_) => GitUrlParseError::UnexpectedFormat, - }) + Ok((input, AzureDevOpsProvider { org, project, repo })) } - fn parse_ssh_path(input: &str) -> Result<(&str, AzureDevOpsProvider), GitUrlParseError> { - let parse_result = || -> IResult<&str, AzureDevOpsProvider> { - // Handle optional leading v3/ or other prefix - let (input, _) = opt(take_until("/")).parse(input)?; - let (input, _) = opt(tag("/")).parse(input)?; - // Parse org/project/repo - let (input, (org, (project, repo))) = separated_pair( + fn parse_ssh_path(input: &str) -> Result<(&str, AzureDevOpsProvider), GitUrlParseError> { + // Handle optional leading v3/ or other prefix + let (input, _) = opt(take_until("/")).parse(input)?; + let (input, _) = opt(tag("/")).parse(input)?; + + // Parse org/project/repo + let (input, (org, (project, repo))) = separated_pair( + is_not("/"), + tag("/"), + separated_pair( is_not("/"), tag("/"), - separated_pair( - is_not("/"), - tag("/"), - terminated(is_not("."), opt(tag(".git"))), - ), - ) - .parse(input)?; - - Ok((input, AzureDevOpsProvider { org, project, repo })) - }; + terminated(is_not("."), opt(tag(".git"))), + ), + ) + .parse(input)?; - parse_result().map_err(|e| match e { - nom::Err::Error(err) | nom::Err::Failure(err) => { - GitUrlParseError::NomParseError(err.to_string()) - } - nom::Err::Incomplete(_) => GitUrlParseError::UnexpectedFormat, - }) + Ok((input, AzureDevOpsProvider { org, project, repo })) } } @@ -143,50 +117,40 @@ pub struct GitLabProvider<'a> { impl<'a> GitLabProvider<'a> { fn parse_path(input: &str) -> Result<(&str, GitLabProvider), GitUrlParseError> { - let parse_result = || -> IResult<&str, GitLabProvider> { - // Optional leading slash - let (input, _) = opt(tag("/")).parse(input)?; - - // Remove .git extension if present - let input = input.trim_end_matches(".git"); - - // Split the path - let parts: Vec<&str> = input.split('/').filter(|s| !s.is_empty()).collect(); - - // Ensure we have at least 2 parts (owner and repo) - if parts.len() < 2 { - return Err(nom::Err::Error(nom::error::Error::new( - input, - nom::error::ErrorKind::Fail, - ))); - } - - // Last part is the repo - let repo = parts[parts.len() - 1]; - - // Everything before the last part is the owner/subgroups - let (owner, subgroup) = if parts.len() > 2 { - (parts[0], Some(parts[1..parts.len() - 1].to_vec())) - } else { - (parts[0], None) - }; - - Ok(( - input, - GitLabProvider { - owner, - subgroup, - repo, - }, - )) + // Optional leading slash + let (input, _) = opt(tag("/")).parse(input)?; + + // Remove .git extension if present + let input = input.trim_end_matches(".git"); + + // Split the path + let parts: Vec<&str> = input.split('/').filter(|s| !s.is_empty()).collect(); + + // Ensure we have at least 2 parts (owner and repo) + if parts.len() < 2 { + return Err(GitUrlParseError::ProviderParseFail( + "Path needs at least 2 parts: ex. \'/owner/repo\'".into(), + )); + } + + // Last part is the repo + let repo = parts[parts.len() - 1]; + + // Everything before the last part is the owner/subgroups + let (owner, subgroup) = if parts.len() > 2 { + (parts[0], Some(parts[1..parts.len() - 1].to_vec())) + } else { + (parts[0], None) }; - parse_result().map_err(|e| match e { - nom::Err::Error(err) | nom::Err::Failure(err) => { - GitUrlParseError::NomParseError(err.to_string()) - } - nom::Err::Incomplete(_) => GitUrlParseError::UnexpectedFormat, - }) + Ok(( + input, + GitLabProvider { + owner, + subgroup, + repo, + }, + )) } } diff --git a/src/types/spec.rs b/src/types/spec.rs index 791bc4c..6f831e5 100644 --- a/src/types/spec.rs +++ b/src/types/spec.rs @@ -2,7 +2,7 @@ use getset::CopyGetters; use nom::Finish; use nom::branch::alt; use nom::bytes::complete::{tag, take_while}; -use nom::character::complete::{alpha1, digit1}; +use nom::character::complete::alpha1; use nom::combinator::{map_opt, peek, recognize, verify}; use nom::error::context; use nom::multi::{many0, many1}; @@ -17,15 +17,15 @@ pub(crate) struct UrlSpecParser<'url> { } impl<'url> UrlSpecParser<'url> { - // https://datatracker.ietf.org/doc/html/rfc3986 - // Based on rfc3986, but does not strictly cover the spec - // * No support for: - // * query, fragment, percent-encoding, and much of the edges for path support - // * many forms of ip representations like ipv6, hexdigits - // * Added support for: - // * parsing ssh git urls which use ":" as a delimiter between the authority and path - // * parsing userinfo into user:token (but its officially deprecated, per #section-3.2.1) - // * some limited support for windows/linux filepaths + /// https://datatracker.ietf.org/doc/html/rfc3986 + /// Based on rfc3986, but does not strictly cover the spec + /// * No support for: + /// * query, fragment, percent-encoding, and much of the edges for path support + /// * many forms of ip representations like ipv6, hexdigits + /// * Added support for: + /// * parsing ssh git urls which use ":" as a delimiter between the authority and path + /// * parsing userinfo into user:token (but its officially deprecated, per #section-3.2.1) + /// * some limited support for windows/linux filepaths pub(crate) fn parse(input: &'url str) -> IResult<&'url str, Self> { let (input, scheme) = Self::parse_scheme.parse(input).finish().unwrap_or_default(); let (input, heir_part) = Self::parse_hier_part(input).finish().unwrap_or_default(); @@ -313,10 +313,17 @@ impl<'url> UrlSpecParser<'url> { debug!("Parsing port"); } + // We need to pull the full value of what's in the segment THEN parse for numbers let (input, port) = context( "Port parser", opt(map_opt( - verify(preceded(tag(":"), digit1), |p_str: &str| !p_str.is_empty()), + verify( + preceded( + tag(":"), + take_while(|c: char| unreserved_uri_chars(c) || subdelims_uri_chars(c)), + ), + |p_str: &str| !p_str.is_empty(), + ), |s: &str| s.parse::().ok(), )), ) diff --git a/tests/parse.rs b/tests/parse.rs index 3fc2055..1ada913 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -212,7 +212,6 @@ fn absolute_unix_path() { assert_eq!(parsed.print_scheme(), false); } -// Issue #6 - Relative Windows paths will parse into Unix paths #[test] fn relative_windows_path() { let test_url = r"..\project-name.git"; @@ -229,9 +228,6 @@ fn relative_windows_path() { assert_eq!(parsed.print_scheme(), false); } -// Can I use `typed-path` to deal with this? -// Issue #7 - Absolute Windows paths will not parse at all -//#[should_panic(expected = "URL parse failed: UnexpectedFormat")] #[test] fn absolute_windows_path() { let test_url = r"c:\project-name.git"; @@ -253,10 +249,9 @@ fn bad_port_1() { let e = GitUrl::parse(test_url); assert!(e.is_err()); - //assert_eq!( - // format!("{}", e.err().unwrap()), - // "Error from Url crate: invalid port number" - //); + if let Err(err) = e { + assert_eq!(err, GitUrlParseError::InvalidPortNumber) + } } #[test] @@ -265,10 +260,9 @@ fn bad_port_2() { let e = GitUrl::parse(test_url); assert!(e.is_err()); - //assert_eq!( - // format!("{}", e.err().unwrap()), - // "Error from Url crate: invalid port number" - //); + if let Err(err) = e { + assert_eq!(err, GitUrlParseError::InvalidPortNumber) + } } #[test] @@ -277,6 +271,9 @@ fn port_out_of_range() { let e = GitUrl::parse(test_url); assert!(e.is_err()); + if let Err(err) = e { + assert_eq!(err, GitUrlParseError::InvalidPortNumber) + } } #[test] @@ -285,6 +282,9 @@ fn host_missing_1() { let e = GitUrl::parse(test_url); assert!(e.is_err()); + if let Err(err) = e { + assert_eq!(err, GitUrlParseError::InvalidPathEmpty) + } } #[test] @@ -293,11 +293,15 @@ fn host_missing_2() { let e = GitUrl::parse(test_url); assert!(e.is_err()); + if let Err(err) = e { + assert_eq!(err, GitUrlParseError::InvalidPathEmpty) + } } +// FIXME: This test does not throw the correct error #[test] fn host_invalid() { - let test_url = "foo://exa[mple.org"; + let test_url = "foo://exa[mple.org/owner/repo.git"; let e = GitUrl::parse(test_url); assert!(e.is_err()); diff --git a/tests/provider.rs b/tests/provider.rs index 5a8e49f..b83fa2d 100644 --- a/tests/provider.rs +++ b/tests/provider.rs @@ -3,13 +3,6 @@ use git_url_parse::types::provider::{ }; use git_url_parse::{GitUrl, GitUrlParseError}; -// GitHub -// https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository -// BitBucket -// https://confluence.atlassian.com/bitbucketserver/clone-a-repository-790632786.html -// Codeberg -// https://codeberg.org/explore/repos - #[test] fn http_generic_git() { let test_url = "https://github.com/tjtelan/git-url-parse-rs.git"; @@ -67,10 +60,6 @@ fn self_host() { assert_eq!(provider_info, expected) } -// Azure Devops -// https://learn.microsoft.com/en-us/azure/devops/repos/git/clone?view=azure-devops&tabs=visual-studio-2022 -// https://learn.microsoft.com/en-us/azure/devops/release-notes/2018/sep-10-azure-devops-launch#administration -//vec!["dev.azure.com", "ssh.dev.azure.com", "visualstudio.com"]; #[test] fn http_azure_devops() { let test_url = "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName"; @@ -99,10 +88,6 @@ fn ssh_azure_devops() { assert_eq!(provider_info, expected) } -// GitLab -// https://docs.gitlab.com/topics/git/clone/#clone-with-ssh -// https://gitlab.com/explore/projects/trending?sort=latest_activity_desc -// https://gitlab.com/redhat/red-hat-ci-tools/kernel #[test] fn http_gitlab() { let test_url = "https://gitlab.com/gitlab-org/gitlab.git"; @@ -164,53 +149,10 @@ fn filepath() { let test_url = "file:///home/user/Documents/"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - //assert!(parsed.provider_info::().is_ok()); - let provider_info: Result = parsed.provider_info(); - assert!(provider_info.is_err()) -} -//// Move test -////#[test] -////fn ssh_user_path_not_acctname_reponame_format() { -//// let test_url = "git@test.com:repo"; -//// let e = GitUrl::parse(test_url); -//// -//// assert!(e.is_err()); -//// assert_eq!( -//// format!("{}", e.err().unwrap()), -//// "Git Url not in expected format" -//// ); -////} -// -//// Move test -////#[test] -////fn ssh_without_organization() { -//// let test_url = "ssh://f589726c3611:29418/repo"; -//// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -//// let expected = GitUrl { -//// host: Some("f589726c3611".to_string()), -//// //name: "repo".to_string(), -//// //owner: Some("repo".to_string()), -//// //organization: None, -//// //fullname: "repo/repo".to_string(), -//// scheme: Some(Scheme::Ssh), -//// user: None, -//// token: None, -//// port: Some(29418), -//// path: "repo".to_string(), -//// //git_suffix: false, -//// //scheme_prefix: true, -//// print_scheme: true, -//// }; -//// -//// assert_eq!(parsed, expected); -////} -// -////#[test] -////fn empty_path() { -//// assert_eq!( -//// GitUrlParseError::EmptyPath, -//// GitUrl::parse("file://").unwrap_err() -//// ) -////} + assert!(provider_info.is_err()); + if let Err(e) = provider_info { + assert_eq!(e, GitUrlParseError::ProviderUnsupported) + } +} diff --git a/tests/trim_auth.rs b/tests/trim_auth.rs index 73d4e5e..ed82d6b 100644 --- a/tests/trim_auth.rs +++ b/tests/trim_auth.rs @@ -8,7 +8,7 @@ fn ssh_user_ports() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); - assert_eq!(format!("{trimmed}"), expected); + assert_eq!(trimmed.to_string(), expected); } // Specific service support @@ -20,7 +20,7 @@ fn https_user_bitbucket() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); - assert_eq!(format!("{trimmed}"), expected); + assert_eq!(trimmed.to_string(), expected); } #[test] @@ -31,7 +31,7 @@ fn ssh_user_bitbucket() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); - assert_eq!(format!("{trimmed}"), expected); + assert_eq!(trimmed.to_string(), expected); } #[test] @@ -42,7 +42,7 @@ fn https_user_auth_bitbucket() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); - assert_eq!(format!("{trimmed}"), expected); + assert_eq!(trimmed.to_string(), expected); } #[test] @@ -53,7 +53,7 @@ fn https_user_github() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); - assert_eq!(format!("{trimmed}"), expected); + assert_eq!(trimmed.to_string(), expected); } #[test] @@ -64,7 +64,7 @@ fn ssh_user_github() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); - assert_eq!(format!("{trimmed}"), expected); + assert_eq!(trimmed.to_string(), expected); } #[test] @@ -75,7 +75,7 @@ fn https_user_auth_github() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); - assert_eq!(format!("{trimmed}"), expected); + assert_eq!(trimmed.to_string(), expected); } #[test] @@ -86,7 +86,7 @@ fn ssh_user_azure_devops() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); - assert_eq!(format!("{trimmed}"), expected); + assert_eq!(trimmed.to_string(), expected); } #[test] @@ -97,5 +97,5 @@ fn https_user_azure_devops() { let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); - assert_eq!(format!("{trimmed}"), expected); + assert_eq!(trimmed.to_string(), expected); } From 6d4cbcf0319937ae96d0d934518019b6624003aa Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Thu, 11 Sep 2025 23:32:46 -0700 Subject: [PATCH 26/32] Update logging and docs for release * cargo-rdme for readme * Added serde and url as optional --- Cargo.toml | 29 ++++--- README.md | 166 ++++++++++++++++++++---------------- src/lib.rs | 97 ++++++++++++++++++++- src/types/error.rs | 36 +++++--- src/types/mod.rs | 142 +++++++++++++++++++++---------- src/types/provider/mod.rs | 172 ++++++++++++++++++++++++++++++++++++-- src/types/spec.rs | 131 ++++++++++++++++++----------- tests/parse.rs | 91 ++++++++++++++------ tests/provider.rs | 170 ++++++++++++++++++++++++++----------- tests/trim_auth.rs | 28 +++++++ 10 files changed, 787 insertions(+), 275 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 05e70b2..79e46ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ categories = ["parser-implementations"] description = "A parser for git repo urls" documentation = "https://docs.rs/git-url-parse" edition = "2024" -keywords = ["git", "url", "parsing"] +keywords = ["git", "url", "parser"] license = "MIT" name = "git-url-parse" readme = "README.md" @@ -13,23 +13,22 @@ version = "0.4.6" rust-version = "1.82" [features] -default = [] -tracing = ["dep:tracing"] -#filepath -#validate -#provider +default = ["url"] +# Enable serde +serde = ["dep:serde"] +# Enable debugging logging with `log` crate +log = ["dep:log"] +# Enable url parsing validation with `url` crate +url = ["dep:url"] [dependencies] -tracing = { version = "0.1", optional = true } nom = "8" -thiserror = "2" getset = "0.1.6" -url = { version = "2.5" } -#strum = { version = "0.27", features = ["derive"] } - -#derive_builder = "0.20" -#typed-path = "0.11.0" +thiserror = "2" +serde = { version = "1", features = ["derive"], optional = true } +log = { version = "0.4", optional = true } +url = { version = "2.5", optional = true } [dev-dependencies] -env_logger = "0.11" -regex = "1.10" +env_logger = "*" +log = "*" diff --git a/README.md b/README.md index 5b5b31f..0558388 100644 --- a/README.md +++ b/README.md @@ -1,95 +1,117 @@ -# git-url-parse - [![Crates.io](https://img.shields.io/crates/v/git-url-parse)](https://crates.io/crates/git-url-parse) ![Crates.io MSRV](https://img.shields.io/crates/msrv/git-url-parse?label=rust-version) +[![Crates.io Total Downloads](https://img.shields.io/crates/d/git-url-parse?label=crates.io)](https://crates.io/crates/git-url-parse) [![Github actions CI status](https://github.com/tjtelan/git-url-parse-rs/actions/workflows/ci.yml/badge.svg)](https://github.com/tjtelan/git-url-parse-rs/actions/workflows/ci.yml) [![docs.rs](https://docs.rs/git-url-parse/badge.svg)](https://docs.rs/git-url-parse/) [![License](https://img.shields.io/github/license/tjtelan/git-url-parse-rs)](LICENSE) ![Maintenance](https://img.shields.io/maintenance/passively-maintained/2025) -Supports common protocols as specified by the [Pro Git book](https://git-scm.com/book/en/v2) +--- -See: [4.1 Git on the Server - The Protocols](https://git-scm.com/book/en/v2/Git-on-the-Server-The-Protocols) + -Supports parsing SSH/HTTPS repo urls for: -* Github -* Bitbucket -* Azure Devops +# Git Url Parse -See [tests/parse.rs](tests/parse.rs) for expected output for a variety of inputs. +Parses url used by git (e.g. `git clone `) ---- +## Features + +- 🔍 Parses `git clone` compatible urls into [`GitUrl`](https://docs.rs/git-url-parse/latest/git_url_parse/types/struct.GitUrl.html) + - Supports multiple Git URL schemes (SSH, HTTP, HTTPS, File) + - Inspired by [RFC 3986](https://datatracker.ietf.org/doc/html/rfc3986) with adaptations to support Git urls + +- 🏗️ Host provider info extraction + - Easy to implement trait [`GitProvider`](https://docs.rs/git-url-parse/latest/git_url_parse/types/provider/trait.GitProvider.html) for custom provider parsing + - Built-in support for multiple Git hosting providers + * [Generic](https://docs.rs/git-url-parse/latest/git_url_parse/types/provider/struct.GenericProvider.html) (`git@host:owner/repo.git` style urls) + * [GitLab](https://docs.rs/git-url-parse/latest/git_url_parse/types/provider/struct.GitLabProvider.html) + * [Azure DevOps](https://docs.rs/git-url-parse/latest/git_url_parse/types/provider/struct.AzureDevOpsProvider.html) + +## Quick Example + +```rust +use git_url_parse::{GitUrl, GitUrlParseError}; +use git_url_parse::types::provider::GitProvider; +use git_url_parse::types::provider::GenericProvider; + +fn main() -> Result<(), git_url_parse::GitUrlParseError> { + let http_url = GitUrl::parse("https://github.com/tjtelan/git-url-parse-rs.git")?; + + // Extract basic URL components + assert_eq!(http_url.host(), Some("github.com")); + assert_eq!(http_url.path(), "/tjtelan/git-url-parse-rs.git"); + + // Support ssh-based urls as well + let ssh_url = GitUrl::parse("git@github.com:tjtelan/git-url-parse-rs.git")?; + + assert_eq!(ssh_url.scheme(), Some("ssh")); + assert_eq!(ssh_url.host(), Some("github.com")); + assert_eq!(ssh_url.path(), "tjtelan/git-url-parse-rs.git"); + + // Extract provider-specific information + // Built-in support for Github (Generic), Gitlab, Azure Devops style urls + let provider : GenericProvider = ssh_url.provider_info()?; + assert_eq!(provider.owner(), "tjtelan"); + assert_eq!(provider.repo(), "git-url-parse-rs"); + + // Implement your own provider + #[derive(Debug, Clone, PartialEq, Eq)] + struct CustomProvider; + + impl GitProvider, GitUrlParseError> for CustomProvider { + fn from_git_url(_url: &GitUrl) -> Result { + // Your custom provider parsing here + Ok(Self) + } + } + + let custom_provider: CustomProvider = ssh_url.provider_info()?; + let expected = CustomProvider; + assert_eq!(custom_provider, expected); + + Ok(()) +} +``` -URLs that use the `ssh://` protocol (implicitly or explicitly) undergo a small normalization process in order to be parsed. +## Limitations -Internally uses `Url::parse()` from the [Url](https://crates.io/crates/url) crate after normalization. + Intended only for git repo urls. Url spec [RFC 3986](https://datatracker.ietf.org/doc/html/rfc3986) is not fully implemented. -## Examples +- No support for: + - Query parameters + - Fragment identifiers + - Percent-encoding + - Complex IP address formats -### Run example with debug output +## Install ```shell -$ RUST_LOG=git_url_parse cargo run --example multi -$ RUST_LOG=git_url_parse cargo run --example trim_auth +cargo add git-url-parse ``` -### Simple usage and output +### Cargo Features -```bash -$ cargo run --example readme -``` +#### `log` +Enable for internal `debug!` output from [log](https://docs.rs/log/latest) +#### `serde` +Enable for [serde](https://docs.rs/serde/latest/) `Serialize`/`Deserialize` on [`GitUrl`](https://docs.rs/git-url-parse/latest/git_url_parse/types/struct.GitUrl.html) +#### `url` +(**enabled by default**) -```rust -use git_url_parse::GitUrl; +Uses [url](https://docs.rs/url/latest/) during parsing for full url validation -fn main() { - println!("SSH: {:#?}", GitUrl::parse("git@github.com:tjtelan/git-url-parse-rs.git")); - println!("HTTPS: {:#?}", GitUrl::parse("https://github.com/tjtelan/git-url-parse-rs")); -} -``` + + +## Migration from 0.4.x and earlier + +This crate was one of my first serious projects in Rust. Because I was still learning, it had some maintenance problems and was a bit awkward to use. In version 0.5, I rewrote most of it to fix those issues. + +The [`GitUrl`](https://docs.rs/git-url-parse/latest/git_url_parse/types/struct.GitUrl.html) struct is only meant to handle parsing urls used by `git`, which the [url](https://docs.rs/url/latest/url) crate doesn't handle. The recent updates make it so the input string is parsed and internally stored into a simple string slice (`&str`). And, instead of exposing all the internal fields of the struct, those details are hidden, and we use methods to interact with it. + +The [`GitProvider`](https://docs.rs/git-url-parse/latest/git_url_parse/types/provider/trait.GitProvider.html) trait helps extract common pieces of information that are often found in different url patterns using the [`GitUrl::provider_info`](https://docs.rs/git-url-parse/latest/git_url_parse/types/struct.GitUrl.html#method.provider_info) method. Several example provider parsers are included to show how this works. The result of [`GitUrl::parse`](https://docs.rs/git-url-parse/latest/git_url_parse/types/struct.GitUrl.html#method.parse) is more straightforward to use, but the internal details are hidden, and working with provider-specific information at the git host level is more specialized. + +The most common pattern for git url paths, like `/owner/repo.git`, is handled by [`GenericProvider`](https://docs.rs/git-url-parse/latest/git_url_parse/types/provider/struct.GenericProvider.html). + +There's also [`AzureDevOpsProvider`](https://docs.rs/git-url-parse/latest/git_url_parse/types/provider/struct.AzureDevOpsProvider.html), which is designed for Azure DevOps urls that follow the `org`, `project`, `repo` pattern. -### Example Output -```bash -SSH: Ok( - GitUrl { - host: Some( - "github.com", - ), - name: "git-url-parse-rs", - owner: Some( - "tjtelan", - ), - organization: None, - fullname: "tjtelan/git-url-parse-rs", - scheme: Ssh, - user: Some( - "git", - ), - token: None, - port: None, - path: "tjtelan/git-url-parse-rs.git", - git_suffix: true, - scheme_prefix: false, - }, -) -HTTPS: Ok( - GitUrl { - host: Some( - "github.com", - ), - name: "git-url-parse-rs", - owner: Some( - "tjtelan", - ), - organization: None, - fullname: "tjtelan/git-url-parse-rs", - scheme: Https, - user: None, - token: None, - port: None, - path: "/tjtelan/git-url-parse-rs", - git_suffix: false, - scheme_prefix: true, - }, -) -``` \ No newline at end of file +Finally, there's a new supported provider called [`GitLabProvider`](https://docs.rs/git-url-parse/latest/git_url_parse/types/provider/struct.GitLabProvider.html), which is for GitLab urls. It supports the common `owner/repo` pattern shared with [`GenericProvider`](https://docs.rs/git-url-parse/latest/git_url_parse/types/provider/struct.GenericProvider.html), and also handles GitLab’s subgroups. diff --git a/src/lib.rs b/src/lib.rs index 1c40aec..2386dee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,99 @@ +#![deny(missing_docs)] +#![deny(clippy::missing_docs_in_private_items)] +#![allow(rustdoc::redundant_explicit_links)] // for cargo-rdme + +//! # Git Url Parse +//! +//! Parses url used by git (e.g. `git clone `) +//! +//! ## Features +//! +//! - 🔍 Parses `git clone` compatible urls into [`GitUrl`](crate::types::GitUrl) +//! - Supports multiple Git URL schemes (SSH, HTTP, HTTPS, File) +//! - Inspired by [RFC 3986](https://datatracker.ietf.org/doc/html/rfc3986) with adaptations to support Git urls +//! +//! - 🏗️ Host provider info extraction +//! - Easy to implement trait [`GitProvider`](crate::types::provider::GitProvider) for custom provider parsing +//! - Built-in support for multiple Git hosting providers +//! * [Generic](crate::types::provider::GenericProvider) (`git@host:owner/repo.git` style urls) +//! * [GitLab](crate::types::provider::GitLabProvider) +//! * [Azure DevOps](crate::types::provider::AzureDevOpsProvider) +//! +//! ## Quick Example +//! +//! ```rust +//! use git_url_parse::{GitUrl, GitUrlParseError}; +//! use git_url_parse::types::provider::GitProvider; +//! use git_url_parse::types::provider::GenericProvider; +//! +//! fn main() -> Result<(), git_url_parse::GitUrlParseError> { +//! let http_url = GitUrl::parse("https://github.com/tjtelan/git-url-parse-rs.git")?; +//! +//! // Extract basic URL components +//! assert_eq!(http_url.host(), Some("github.com")); +//! assert_eq!(http_url.path(), "/tjtelan/git-url-parse-rs.git"); +//! +//! // Support ssh-based urls as well +//! let ssh_url = GitUrl::parse("git@github.com:tjtelan/git-url-parse-rs.git")?; +//! +//! assert_eq!(ssh_url.scheme(), Some("ssh")); +//! assert_eq!(ssh_url.host(), Some("github.com")); +//! assert_eq!(ssh_url.path(), "tjtelan/git-url-parse-rs.git"); +//! +//! // Extract provider-specific information +//! // Built-in support for Github (Generic), Gitlab, Azure Devops style urls +//! let provider : GenericProvider = ssh_url.provider_info()?; +//! assert_eq!(provider.owner(), "tjtelan"); +//! assert_eq!(provider.repo(), "git-url-parse-rs"); +//! +//! // Implement your own provider +//! #[derive(Debug, Clone, PartialEq, Eq)] +//! struct CustomProvider; +//! +//! impl GitProvider, GitUrlParseError> for CustomProvider { +//! fn from_git_url(_url: &GitUrl) -> Result { +//! // Your custom provider parsing here +//! Ok(Self) +//! } +//! } +//! +//! let custom_provider: CustomProvider = ssh_url.provider_info()?; +//! let expected = CustomProvider; +//! assert_eq!(custom_provider, expected); +//! +//! Ok(()) +//! } +//! ``` +//! +//! ## Limitations +//! +//! Intended only for git repo urls. Url spec [RFC 3986](https://datatracker.ietf.org/doc/html/rfc3986) is not fully implemented. +//! +//! - No support for: +//! - Query parameters +//! - Fragment identifiers +//! - Percent-encoding +//! - Complex IP address formats +//! +//! ## Install +//! +//! ```shell +//! cargo add git-url-parse +//! ``` +//! +//! ### Cargo Features +//! +//! #### `log` +//! Enable for internal `debug!` output from [log](https://docs.rs/log/latest) +//! #### `serde` +//! Enable for [serde](https://docs.rs/serde/latest/) `Serialize`/`Deserialize` on [`GitUrl`](crate::types::GitUrl) +//! #### `url` +//! (**enabled by default**) +//! +//! Uses [url](https://docs.rs/url/latest/) during parsing for full url validation +//! + pub mod types; -// Re-exports +/// Re-exports pub use types::{GitUrl, GitUrlParseError}; diff --git a/src/types/error.rs b/src/types/error.rs index cda0b3f..7d24608 100644 --- a/src/types/error.rs +++ b/src/types/error.rs @@ -1,34 +1,50 @@ +//! # GitUrl error handling +//! +//! Error struct to use as Err for parsing Git urls + use thiserror::Error; +/// Internal error type for `GitUrl` for parsing errors #[derive(Error, Debug, PartialEq, Eq)] pub enum GitUrlParseError { + #[cfg(feature = "url")] + /// Error originating from from `url` crate during validation #[error("Error from Url crate: {0}")] UrlParseError(#[from] url::ParseError), + /// Parsing error converted from `nom` crate #[error("Nom crate parsing error: {0}")] NomParseError(String), + /// Git url must contain a non-empty path #[error("Git Url must have a path")] InvalidPathEmpty, + /// Invalid port number detected #[error("Invalid port number")] InvalidPortNumber, - #[error("Tokens only supported by httplike urls")] - InvalidTokenUnsupported, + /// Password are only supported in HTTP-like url + #[error("Password only supported by httplike urls")] + InvalidPasswordUnsupported, + /// File-like url must follow filesystem path patterns #[error("Filelike urls expect only scheme and/or path")] InvalidFilePattern, - #[error("Git Url not supported by provider")] + /// `GitUrl`not supported by the [`GitProvider`](crate::types::provider::GitProvider) + #[error("GitUrl not supported by provider")] ProviderUnsupported, + /// Detected null bytes in the input url #[error("Found null bytes within input url before parsing")] FoundNullBytes, + /// Failed to extract provider-specific info from url #[error("Provider info parse failed: {0}")] ProviderParseFail(String), + /// Catch-all error for unexpected failures during parsing #[error("Unexpected error occurred during parsing")] UnexpectedError, } @@ -36,14 +52,12 @@ pub enum GitUrlParseError { impl<'a> From> for GitUrlParseError { fn from(err: nom::Err<(&'a str, nom::error::ErrorKind)>) -> Self { match err { - nom::Err::Error((input, kind)) => GitUrlParseError::NomParseError(format!( - "Parse error at: {}, kind: {:?}", - input, kind - )), - nom::Err::Failure((input, kind)) => GitUrlParseError::NomParseError(format!( - "Parse failure at: {}, kind: {:?}", - input, kind - )), + nom::Err::Error((input, kind)) => { + GitUrlParseError::NomParseError(format!("Parse error at: {input}, kind: {kind:?}",)) + } + nom::Err::Failure((input, kind)) => { + GitUrlParseError::NomParseError(format!("Parse error at: {input}, kind: {kind:?}",)) + } nom::Err::Incomplete(_) => GitUrlParseError::UnexpectedError, } } diff --git a/src/types/mod.rs b/src/types/mod.rs index 7dd64a9..bebf9ca 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -1,3 +1,8 @@ +//! # GitUrl internal types +//! +//! Internal types and parsing logic for Git urls +//! + mod error; mod spec; use spec::*; @@ -9,37 +14,58 @@ use core::str; use std::fmt; use getset::{CloneGetters, CopyGetters, Setters}; +#[cfg(feature = "log")] +use log::debug; use nom::Finish; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; -#[cfg(feature = "tracing")] -use tracing::debug; - +/// Assigned as a label during parsing for different Git URL types. +/// Some printing or `GitProvider` parsing behavior are influenced by this type. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub(crate) enum GitUrlParseHint { + /// The default status #[default] Unknown, + /// When `ssh` is in the scheme, or a `:` is used as initial path separator Sshlike, + /// When `file` is in scheme, or filesystem-like relative paths Filelike, + /// Default network scheme if not `ssh`. If `:` is used as initial path separator in the userinfo Httplike, } +/// Represents a parsed Git repository url +/// +/// GitUrl is an input url used by git. +/// Parsing of the url inspired by rfc3986, but does not strictly cover the spec +/// Optional, but by default, uses the `url` crate to perform a final validation of the parsing effort #[derive(Clone, CopyGetters, CloneGetters, Debug, Default, Setters, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct GitUrl<'url> { + /// scheme name (i.e. `scheme://`) #[getset(get_copy = "pub", set = "pub(crate)")] scheme: Option<&'url str>, + /// user name userinfo #[getset(get_copy = "pub", set = "pub(crate)")] user: Option<&'url str>, + /// password userinfo provided with `user` (i.e. `user`:`password`@...) #[getset(get_copy = "pub", set = "pub(crate)")] - token: Option<&'url str>, + password: Option<&'url str>, + /// The hostname or IP of the repo host #[getset(get_copy = "pub")] host: Option<&'url str>, + /// The port number of the repo host, if specified #[getset(get_copy = "pub")] port: Option, + /// File or network path to repo #[getset(get_copy = "pub", set = "pub(crate)")] path: &'url str, - /// Include scheme:// when printing url + /// If we should print `scheme://` from input or derived during parsing #[getset(get_copy = "pub", set = "pub(crate)")] print_scheme: bool, + /// Pattern style of url derived during parsing #[getset(get_copy = "pub(crate)")] hint: GitUrlParseHint, } @@ -54,14 +80,19 @@ impl fmt::Display for GitUrl<'_> { } impl<'url> GitUrl<'url> { + /// Wrapper function for the default output mode via [`Display`](std::fmt::Display) trait fn display(&self) -> String { self.build_string(false) } + /// Wrapper function for printing a url for the [`url`](https://docs.rs/url/latest/url/) crate + #[cfg(feature = "url")] fn url_compat_display(&self) -> String { self.build_string(true) } + /// This method rebuilds the printable GitUrl from its components. + /// `url_compat` results in output that can be parsed by the `url` crate fn build_string(&self, url_compat: bool) -> String { let scheme = if self.print_scheme() || url_compat { if let Some(scheme) = self.scheme() { @@ -73,10 +104,10 @@ impl<'url> GitUrl<'url> { String::new() }; - let auth_info = match (self.user(), self.token()) { - (Some(user), Some(token)) => format!("{user}:{token}@"), + let auth_info = match (self.user(), self.password()) { + (Some(user), Some(password)) => format!("{user}:{password}@"), (Some(user), None) => format!("{user}@",), - (None, Some(token)) => format!("{token}@"), + (None, Some(password)) => format!("{password}@"), (None, None) => String::new(), }; @@ -110,17 +141,27 @@ impl<'url> GitUrl<'url> { } impl<'url> GitUrl<'url> { - /// Returns `GitUrl` after removing `user` and `token` values - /// Intended use-case is for non-destructive printing GitUrl excluding any embedded auth info + /// Returns `GitUrl` after removing all user info values pub fn trim_auth(&self) -> GitUrl { let mut new_giturl = self.clone(); new_giturl.set_user(None); - new_giturl.set_token(None); - #[cfg(feature = "tracing")] - debug!(?new_giturl); + new_giturl.set_password(None); + #[cfg(feature = "log")] + debug!("{new_giturl:?}"); new_giturl } + /// Returns a `Result` after parsing `input` for metadata + /// + /// ``` + /// # use git_url_parse::GitUrl; + /// # use git_url_parse::types::provider::GenericProvider; + /// # fn main() -> Result<(), git_url_parse::GitUrlParseError> { + /// let http_url = GitUrl::parse("https://github.com/tjtelan/git-url-parse-rs.git")?; + /// let ssh_url = GitUrl::parse("git@github.com:tjtelan/git-url-parse-rs.git")?; + /// # Ok(()) + /// # } + /// ``` pub fn parse(input: &'url str) -> Result { // Error if there are null bytes within the url // https://github.com/tjtelan/git-url-parse-rs/issues/16 @@ -131,11 +172,11 @@ impl<'url> GitUrl<'url> { let (_input, url_spec_parser) = UrlSpecParser::parse(input).finish().unwrap_or_default(); let mut scheme = url_spec_parser.scheme(); - let user = url_spec_parser.heir_part().authority().userinfo().user(); - let token = url_spec_parser.heir_part().authority().userinfo().token(); - let host = url_spec_parser.heir_part().authority().host(); - let port = url_spec_parser.heir_part().authority().port(); - let mut path = url_spec_parser.heir_part().path(); + let user = url_spec_parser.hier_part().authority().userinfo().user(); + let password = url_spec_parser.hier_part().authority().userinfo().token(); + let host = url_spec_parser.hier_part().authority().host(); + let port = url_spec_parser.hier_part().authority().port(); + let mut path = url_spec_parser.hier_part().path(); // We will respect whether scheme was initially set let print_scheme = scheme.is_some(); @@ -152,15 +193,15 @@ impl<'url> GitUrl<'url> { } } } else if user.is_none() - && token.is_none() + && password.is_none() && host.is_none() && port.is_none() && !path.is_empty() { // if we only have a path => file GitUrlParseHint::Filelike - } else if user.is_some() && token.is_some() { - // If we have a user and token => http + } else if user.is_some() && password.is_some() { + // If we have a user and password => http GitUrlParseHint::Httplike } else if path.starts_with(':') { // If path starts with a colon => ssh @@ -191,7 +232,7 @@ impl<'url> GitUrl<'url> { let git_url = GitUrl { scheme, user, - token, + password, host, port, path, @@ -199,11 +240,23 @@ impl<'url> GitUrl<'url> { hint, }; - let _check = git_url.is_valid()?; + git_url.is_valid()?; Ok(git_url) } + /// ``` + /// use git_url_parse::GitUrl; + /// use git_url_parse::types::provider::GenericProvider; + /// + /// # fn main() -> Result<(), git_url_parse::GitUrlParseError> { + /// let ssh_url = GitUrl::parse("git@github.com:tjtelan/git-url-parse-rs.git")?; + /// let provider : GenericProvider = ssh_url.provider_info()?; + /// # assert_eq!(provider.owner(), "tjtelan"); + /// # assert_eq!(provider.repo(), "git-url-parse-rs"); + /// + /// # Ok(()) + /// # } pub fn provider_info(&self) -> Result where T: provider::GitProvider, GitUrlParseError>, @@ -211,62 +264,67 @@ impl<'url> GitUrl<'url> { T::from_git_url(self) } + /// This is called as the last step before returning a `GitUrl` to the user fn is_valid(&self) -> Result<(), GitUrlParseError> { // Last chance validation - //println!("{self:#?}"); + #[cfg(feature = "log")] + debug!("Validating parsing results {self:#?}"); if self.path().is_empty() { return Err(GitUrlParseError::InvalidPathEmpty); } - // There's an edge case we don't cover: ssh urls using ports + absolute paths + // There's an edge case we don't properly cover: ssh urls using ports + absolute paths // https://mslinn.com/git/040-git-urls.html - describes this pattern, if we decide to parse for it // only ssh paths start with ':' if self.hint() != GitUrlParseHint::Sshlike && self.path.starts_with(':') { - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { - debug!("{}", self.hint()); - debug!(self.path); + debug!("{:?}", self.hint()); + debug!("{:?}", self.path()); debug!("Only sshlike url path starts with ':'"); - debug!("path starts with ':'?", self.path.starts_with(':')); + debug!("path starts with ':'? {}", self.path.starts_with(':')); } return Err(GitUrlParseError::InvalidPortNumber); } - // if we are not httplike, we shouldn't have tokens - if self.hint() != GitUrlParseHint::Httplike && self.token().is_some() { - #[cfg(feature = "tracing")] + // if we are not httplike, we shouldn't have passwords + if self.hint() != GitUrlParseHint::Httplike && self.password().is_some() { + #[cfg(feature = "log")] { - debug!("{}", self.hint()); - debug!("Token support only for httplike url", self.token()); + debug!("{:?}", self.hint()); + debug!("password support only for httplike url: {:?}", self.token()); } - return Err(GitUrlParseError::InvalidTokenUnsupported); + return Err(GitUrlParseError::InvalidPasswordUnsupported); } // if we are filelike, we should only have paths if self.hint() == GitUrlParseHint::Filelike && (self.user().is_some() - || self.token().is_some() + || self.password().is_some() || self.host().is_some() || self.port().is_some() || self.path().is_empty()) { - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { debug!( - "Only scheme and path expected to have values set for filelike urls", - ?self + "Only scheme and path expected to have values set for filelike urls {:?}", + self ); } return Err(GitUrlParseError::InvalidFilePattern); } - // Since we don't fully implement any spec, we'll rely on the url crate - println!("{:#?}", self.url_compat_display()); - let _u = url::Url::parse(&self.url_compat_display())?; + #[cfg(feature = "url")] + { + // Since we don't fully implement any spec, we'll rely on the url crate + println!("{:#?}", self.url_compat_display()); + let _u = url::Url::parse(&self.url_compat_display())?; + } Ok(()) } diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index 1ed948f..f8e8006 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -1,3 +1,14 @@ +//! # Git URL Providers +//! +//! Provides extraction of Git host service info from `GitUrl`s. +//! +//! ## Supported Providers +//! +//! - [Generic Git repositories](crate::types::provider::GenericProvider) +//! - [Azure DevOps](crate::types::provider::AzureDevOpsProvider) +//! - [GitLab](crate::types::provider::GitLabProvider) +//! - Custom (via [`GitProvider`] trait) + use crate::types::GitUrlParseHint; use crate::{GitUrl, GitUrlParseError}; @@ -6,19 +17,77 @@ use nom::Parser; use nom::bytes::complete::{is_not, tag, take_until}; use nom::combinator::opt; use nom::sequence::{preceded, separated_pair, terminated}; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +/// Secondary parser called by [`GitUrl::provider_info()`] to extract Git host provider info from url +/// +/// ``` +/// // Custom trait example +/// +/// use git_url_parse::{GitUrl, GitUrlParseError}; +/// use git_url_parse::types::provider::GitProvider; +/// +/// #[derive(Debug, Clone, PartialEq, Eq)] +/// struct MyCustomProvider; +/// +/// impl GitProvider, GitUrlParseError> for MyCustomProvider { +/// fn from_git_url(_url: &GitUrl) -> Result { +/// // Do your custom parsing here with your GitUrl +/// Ok(Self) +/// } +/// } +/// +/// let test_url = "git@github.com:tjtelan/git-url-parse-rs.git"; +/// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); +/// +/// // Provide your custom type to `GitUrl::provider_info()` +/// let provider_info: MyCustomProvider = parsed.provider_info().unwrap(); +/// let expected = MyCustomProvider; +/// assert_eq!(provider_info, expected) +/// ``` pub trait GitProvider: Clone + std::fmt::Debug { + /// Trait method called by `GitUrl::provider_info()` + /// + /// Logic for extracting service level information from a `GitUrl` fn from_git_url(url: &T) -> Result; } +/// Represents a generic Git repository provider +/// +/// ## Typical Use Cases +/// +/// - Common service hosting with `owner/repo` patterns (e.g. GitHub, Bitbucket) +/// - Self-hosted repositories (e.g. Codeberg, Gitea) +/// +/// Example: +/// +/// ``` +/// use git_url_parse::{GitUrl, GitUrlParseError}; +/// use git_url_parse::types::provider::GenericProvider; +/// +/// let test_url = "git@github.com:tjtelan/git-url-parse-rs.git"; +/// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); +/// +/// let provider_info: GenericProvider = parsed.provider_info().unwrap(); +/// +/// assert_eq!(provider_info.owner(), "tjtelan"); +/// assert_eq!(provider_info.repo(), "git-url-parse-rs"); +/// assert_eq!(provider_info.fullname(), "tjtelan/git-url-parse-rs"); +/// ``` +/// #[derive(Debug, PartialEq, Eq, Clone, CopyGetters)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[getset(get_copy = "pub")] pub struct GenericProvider<'a> { - pub owner: &'a str, - pub repo: &'a str, + /// Repo owner + owner: &'a str, + /// Repo name + repo: &'a str, } impl<'a> GenericProvider<'a> { + /// Parse the most common form of git url by offered by git providers fn parse_path(input: &str) -> Result<(&str, GenericProvider), GitUrlParseError> { let (input, _) = opt(tag("/")).parse(input)?; let (input, (user, repo)) = @@ -26,6 +95,7 @@ impl<'a> GenericProvider<'a> { Ok((input, GenericProvider { owner: user, repo })) } + /// Helper method to get the full name of a repo: `{owner}/{repo}` pub fn fullname(&self) -> String { format!("{}/{}", self.owner, self.repo) } @@ -42,15 +112,48 @@ impl<'a> GitProvider, GitUrlParseError> for GenericProvider<'a> { } } +/// Azure DevOps repository provider +/// ## Supported URL Formats +/// +/// - `https://dev.azure.com/org/project/_git/repo` +/// - `git@ssh.dev.azure.com:v3/org/project/repo` +/// +/// Example: +/// +/// ``` +/// use git_url_parse::{GitUrl, GitUrlParseError}; +/// use git_url_parse::types::provider::AzureDevOpsProvider; +/// +/// let test_url = "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName"; +/// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); +/// +/// let provider_info: AzureDevOpsProvider = parsed.provider_info().unwrap(); +/// +/// assert_eq!(provider_info.org(), "CompanyName"); +/// assert_eq!(provider_info.project(), "ProjectName"); +/// assert_eq!(provider_info.repo(), "RepoName"); +/// assert_eq!(provider_info.fullname(), "CompanyName/ProjectName/RepoName"); +/// ``` +/// #[derive(Debug, PartialEq, Eq, Clone, CopyGetters)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[getset(get_copy = "pub")] pub struct AzureDevOpsProvider<'a> { - pub org: &'a str, - pub project: &'a str, - pub repo: &'a str, + /// Azure Devops organization name + org: &'a str, + /// Azure Devops project name + project: &'a str, + /// Azure Devops repo name + repo: &'a str, } impl<'a> AzureDevOpsProvider<'a> { + /// Helper method to get the full name of a repo: `{org}/{project}/{repo}` + pub fn fullname(&self) -> String { + format!("{}/{}/{}", self.org, self.project, self.repo) + } + + /// Parse the path of a http url for Azure Devops patterns fn parse_http_path(input: &str) -> Result<(&str, AzureDevOpsProvider), GitUrlParseError> { // Handle optional leading / let (input, _) = opt(tag("/")).parse(input)?; @@ -70,6 +173,7 @@ impl<'a> AzureDevOpsProvider<'a> { Ok((input, AzureDevOpsProvider { org, project, repo })) } + /// Parse the path of an ssh url for Azure Devops patterns fn parse_ssh_path(input: &str) -> Result<(&str, AzureDevOpsProvider), GitUrlParseError> { // Handle optional leading v3/ or other prefix let (input, _) = opt(take_until("/")).parse(input)?; @@ -105,17 +209,69 @@ impl<'a> GitProvider, GitUrlParseError> for AzureDevOpsProvider<'a> { } } +/// ## GitLab repository provider +/// +/// ## Supported URL Formats +/// +/// - `https://gitlab.com/owner/repo.git` +/// - `https://gitlab.com/owner/subgroup1/subgroup2/repo.git` +/// - `git@gitlab.com:owner/repo.git` +/// - `git@gitlab.com:owner/subgroup1/subgroup2/repo.git` +/// +/// ## Examples +/// +/// ``` +/// use git_url_parse::GitUrl; +/// use git_url_parse::types::provider::GitLabProvider; +/// +/// fn main() -> Result<(), git_url_parse::GitUrlParseError> { +/// // Top-level repository +/// let url1 = GitUrl::parse("https://gitlab.com/gitlab-org/gitlab.git")?; +/// let provider1 : GitLabProvider = url1.provider_info()?; +/// assert_eq!(provider1.owner(), "gitlab-org"); +/// assert_eq!(provider1.repo(), "gitlab"); +/// assert_eq!(provider1.subgroup(), None); +/// assert_eq!(provider1.fullname(), "gitlab-org/gitlab"); +/// +/// // Repository with subgroups +/// let url2 = GitUrl::parse("https://gitlab.com/owner/group1/group2/project.git")?; +/// let provider2 : GitLabProvider = url2.provider_info()?; +/// assert_eq!(provider2.owner(), "owner"); +/// assert_eq!(provider2.repo(), "project"); +/// assert_eq!(provider2.subgroup(), Some(vec!["group1", "group2"])); +/// assert_eq!(provider2.fullname(), "owner/group1/group2/project"); +/// +/// Ok(()) +/// } +/// ``` +/// #[derive(Clone, Debug, PartialEq, Eq, Default, CopyGetters, CloneGetters)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct GitLabProvider<'a> { + /// Repo owner #[getset(get_copy = "pub")] - pub owner: &'a str, + owner: &'a str, + /// Gitlab subgroups #[getset(get_clone = "pub")] - pub subgroup: Option>, + subgroup: Option>, + /// Repo name #[getset(get_copy = "pub")] - pub repo: &'a str, + repo: &'a str, } impl<'a> GitLabProvider<'a> { + /// Helper method to get the full name of a repo: `{owner}/{repo}` or `{owner}/{subgroups}/{repo}` + pub fn fullname(&self) -> String { + if let Some(subgroup) = self.subgroup() { + let subgroup_str = subgroup.join("/"); + + format!("{}/{subgroup_str}/{}", self.owner, self.repo) + } else { + format!("{}/{}", self.owner, self.repo) + } + } + + /// Parse the path of url for GitLab patterns fn parse_path(input: &str) -> Result<(&str, GitLabProvider), GitUrlParseError> { // Optional leading slash let (input, _) = opt(tag("/")).parse(input)?; diff --git a/src/types/spec.rs b/src/types/spec.rs index 6f831e5..1b7decf 100644 --- a/src/types/spec.rs +++ b/src/types/spec.rs @@ -1,4 +1,11 @@ +//! # GitUrl url spec parser +//! +//! Internal structs with RFC 3968 parsing logic for Git urls +//! + use getset::CopyGetters; +#[cfg(feature = "log")] +use log::debug; use nom::Finish; use nom::branch::alt; use nom::bytes::complete::{tag, take_while}; @@ -9,11 +16,14 @@ use nom::multi::{many0, many1}; use nom::sequence::{pair, preceded, separated_pair, terminated}; use nom::{IResult, Parser, combinator::opt}; +/// Top-level struct for RFC 3986 spec parser #[derive(Debug, Default, Clone, Copy, CopyGetters)] #[getset(get_copy = "pub")] pub(crate) struct UrlSpecParser<'url> { + /// RFC 3986 scheme pub(crate) scheme: Option<&'url str>, - pub(crate) heir_part: UrlHeirPart<'url>, + /// RFC 3986 hier-part + pub(crate) hier_part: UrlHierPart<'url>, } impl<'url> UrlSpecParser<'url> { @@ -30,13 +40,17 @@ impl<'url> UrlSpecParser<'url> { let (input, scheme) = Self::parse_scheme.parse(input).finish().unwrap_or_default(); let (input, heir_part) = Self::parse_hier_part(input).finish().unwrap_or_default(); - let parsed = UrlSpecParser { scheme, heir_part }; + let parsed = UrlSpecParser { + scheme, + hier_part: heir_part, + }; Ok((input, parsed)) } + /// RFC 3986 scheme fn parse_scheme(input: &'url str) -> IResult<&'url str, Option<&'url str>> { - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { debug!("Looking ahead before parsing for scheme"); } @@ -59,15 +73,15 @@ impl<'url> UrlSpecParser<'url> { ); if check.parse(input).is_err() { - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { - debug!("Look ahead check for scheme failed", ?self.token()); + debug!("Look ahead check for scheme failed"); } return Ok((input, None)); } - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { debug!("Look ahead check passed, parsing for scheme"); } @@ -95,21 +109,22 @@ impl<'url> UrlSpecParser<'url> { ) .parse(input)?; - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { - debug!(?input); - debug!(?scheme); + debug!("{input:?}"); + debug!("{scheme:?}"); } Ok((input, scheme)) } + /// RFC 3986 hier-part // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2 // The rfc says parsing the "//" part of the uri belongs to the hier-part parsing // but we only support common internet protocols, file paths, but not other "baseless" ones // so it is sensible for this move it with scheme parsing to support git user service urls - fn parse_hier_part(input: &'url str) -> IResult<&'url str, UrlHeirPart<'url>> { - #[cfg(feature = "tracing")] + fn parse_hier_part(input: &'url str) -> IResult<&'url str, UrlHierPart<'url>> { + #[cfg(feature = "log")] { debug!("Parsing for heir-part"); } @@ -130,19 +145,20 @@ impl<'url> UrlSpecParser<'url> { ) .parse(input)?; - let hier_part = UrlHeirPart { authority, path }; + let hier_part = UrlHierPart { authority, path }; - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { - debug!(?input); - debug!(?heir_part); + debug!("{:?}", input); + debug!("{:?}", hier_part); } Ok((input, hier_part)) } + /// RFC 3986 authority fn parse_authority(input: &'url str) -> IResult<&'url str, UrlAuthority<'url>> { - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { debug!("Parsing for Authority"); } @@ -151,7 +167,7 @@ impl<'url> UrlSpecParser<'url> { let (input, userinfo) = Self::parse_userinfo(input)?; // Host - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { debug!("Looking ahead for windows-style path vs host"); } @@ -167,7 +183,7 @@ impl<'url> UrlSpecParser<'url> { .parse(input); if check.is_ok() { - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { debug!( "Host check failed. Found potential windows-style path while looking for host" @@ -177,7 +193,7 @@ impl<'url> UrlSpecParser<'url> { return Ok((input, UrlAuthority::default())); } - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { debug!("Parsing for host"); } @@ -187,8 +203,8 @@ impl<'url> UrlSpecParser<'url> { opt(verify( recognize(take_while(|c: char| reg_name_uri_chars(c))), |s: &str| { - let has_alphanum = s.chars().any(is_alphanum); - let starts_with_alphanum = s.chars().next().is_some_and(is_alphanum); + let has_alphanum = s.chars().any(char::is_alphanumeric); + let starts_with_alphanum = s.chars().next().is_some_and(char::is_alphanumeric); has_alphanum && starts_with_alphanum && !s.is_empty() }, @@ -196,9 +212,9 @@ impl<'url> UrlSpecParser<'url> { ) .parse(input)?; - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { - debug!("host found", ?host); + debug!("host found: {host:?}"); } // Optional: port @@ -210,18 +226,19 @@ impl<'url> UrlSpecParser<'url> { port, }; - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { - debug!(?input); - debug!(?authority); + debug!("{input:?}"); + debug!("{authority:?}"); } Ok((input, authority)) } + /// RFC 3986 userinfo fn parse_userinfo(authority_input: &'url str) -> IResult<&'url str, UrlUserInfo<'url>> { // Peek for username@ - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { debug!("Checking for for Userinfo"); } @@ -235,7 +252,7 @@ impl<'url> UrlSpecParser<'url> { ); if check.parse(authority_input).is_err() { - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { debug!("Userinfo check failed"); } @@ -255,7 +272,7 @@ impl<'url> UrlSpecParser<'url> { .parse(authority_input)?; let (authority_input, _) = if userinfo.is_some() { - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { debug!("Userinfo found. Parsing for '@'"); } @@ -269,7 +286,7 @@ impl<'url> UrlSpecParser<'url> { // Break down userinfo into user and token let (user, token) = if let Some(userinfo) = userinfo { if userinfo.contains(":") { - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { debug!("Continue break down userinfo into user:token"); } @@ -298,17 +315,18 @@ impl<'url> UrlSpecParser<'url> { let userinfo = UrlUserInfo { user, token }; - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { - debug!(?input); - debug!(?userinfo); + debug!("{authority_input:?}"); + debug!("{userinfo:?}"); } Ok((authority_input, userinfo)) } + /// RFC 3986 port fn parse_port(authority_input: &'url str) -> IResult<&'url str, Option> { - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { debug!("Parsing port"); } @@ -329,17 +347,16 @@ impl<'url> UrlSpecParser<'url> { ) .parse(authority_input)?; - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { - debug!(?input); - debug!(?port); + debug!("{authority_input:?}"); + debug!("{port:?}"); } Ok((input, port)) } - // This will get absolute paths. - // todo: test for empty and start with "//" + /// RFC 3986 path-abempty fn path_abempty_parser( ) -> impl Parser< &'url str, @@ -348,9 +365,9 @@ impl<'url> UrlSpecParser<'url> { >>::Output, Error = nom::error::Error<&'url str>, >{ - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { - debug!("parsing abempty path", ?path); + debug!("parsing abempty path"); } // Starts with '/' or empty @@ -363,6 +380,7 @@ impl<'url> UrlSpecParser<'url> { ) } + /// Not part of RFC 3986 - ssh-based url path fn path_ssh_parser( ) -> impl Parser< &'url str, @@ -371,9 +389,9 @@ impl<'url> UrlSpecParser<'url> { >>::Output, Error = nom::error::Error<&'url str>, >{ - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { - debug!("Parsing ssh path", ?path); + debug!("Parsing ssh path"); } context( @@ -386,6 +404,7 @@ impl<'url> UrlSpecParser<'url> { ) } + /// RFC 3986 path-rootless fn path_rootless_parser( ) -> impl Parser< &'url str, @@ -394,9 +413,9 @@ impl<'url> UrlSpecParser<'url> { >>::Output, Error = nom::error::Error<&'url str>, >{ - #[cfg(feature = "tracing")] + #[cfg(feature = "log")] { - debug!("Parsing rootless path", ?path); + debug!("Parsing rootless path"); } context( @@ -409,46 +428,56 @@ impl<'url> UrlSpecParser<'url> { } } +/// RFC 3986 userinfo #[derive(Debug, Default, Clone, Copy, CopyGetters)] #[getset(get_copy = "pub")] pub(crate) struct UrlUserInfo<'url> { + /// RFC 3986 Userinfo pub(crate) user: Option<&'url str>, + /// Non-spec, deprecated pub(crate) token: Option<&'url str>, } +/// RFC 3986 authority #[derive(Debug, Default, Clone, Copy, CopyGetters)] #[getset(get_copy = "pub")] pub(crate) struct UrlAuthority<'url> { + /// RFC 3986 Username, non-spec token pub(crate) userinfo: UrlUserInfo<'url>, + /// RFC 3986 Host pub(crate) host: Option<&'url str>, + /// RFC 3986 Port pub(crate) port: Option, } +/// RFC 3986 hier-part #[derive(Debug, Default, Clone, Copy, CopyGetters)] #[getset(get_copy = "pub")] -pub(crate) struct UrlHeirPart<'url> { +pub(crate) struct UrlHierPart<'url> { + /// RFC 3986 authority pub(crate) authority: UrlAuthority<'url>, + /// RFC 3986 relative-part pub(crate) path: &'url str, } +/// RFC 3986 pchar pub(crate) fn pchar_uri_chars(c: char) -> bool { // unreserved / pct-encoded (not implemented) / sub-delims / ":" / "@" unreserved_uri_chars(c) || subdelims_uri_chars(c) || c == ':' || c == '@' } +/// RFC 3986 reg-name pub(crate) fn reg_name_uri_chars(c: char) -> bool { // *( unreserved / pct-encoded (not implemented) / sub-delims ) unreserved_uri_chars(c) || subdelims_uri_chars(c) } +/// RFC 3986 unreserved pub(crate) fn unreserved_uri_chars(c: char) -> bool { - is_alphanum(c) || c == '-' || c == '.' || c == '_' || c == '~' -} - -pub(crate) fn is_alphanum(c: char) -> bool { - c.is_ascii_alphabetic() || c.is_ascii_digit() + c.is_alphanumeric() || c == '-' || c == '.' || c == '_' || c == '~' } +/// RFC 3986 sub-delims (mostly) pub(crate) fn subdelims_uri_chars(c: char) -> bool { c == '!' || c == '$' diff --git a/tests/parse.rs b/tests/parse.rs index 1ada913..c7bfc31 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -1,13 +1,17 @@ use git_url_parse::*; +use log::debug; + #[test] fn ssh_user_ports() { + let _ = env_logger::try_init(); let test_url = "ssh://git@host.tld:9999/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("ssh")); assert_eq!(parsed.user(), Some("git")); - assert_eq!(parsed.token(), None); + assert_eq!(parsed.password(), None); assert_eq!(parsed.host(), Some("host.tld")); assert_eq!(parsed.port(), Some(9999)); assert_eq!(parsed.path(), "user/project-name.git"); @@ -16,13 +20,15 @@ fn ssh_user_ports() { #[test] fn ssh_no_scheme_no_user() { + let _ = env_logger::try_init(); let test_url = "host.tld:user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("ssh")); assert_eq!(parsed.user(), None); - assert_eq!(parsed.token(), None); + assert_eq!(parsed.password(), None); assert_eq!(parsed.host(), Some("host.tld")); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), "user/project-name.git"); @@ -32,13 +38,15 @@ fn ssh_no_scheme_no_user() { // Specific service support #[test] fn https_user_bitbucket() { + let _ = env_logger::try_init(); let test_url = "https://user@bitbucket.org/user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("https")); assert_eq!(parsed.user(), Some("user")); - assert_eq!(parsed.token(), None); + assert_eq!(parsed.password(), None); assert_eq!(parsed.host(), Some("bitbucket.org")); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), "/user/repo.git"); @@ -47,13 +55,15 @@ fn https_user_bitbucket() { #[test] fn ssh_user_bitbucket() { + let _ = env_logger::try_init(); let test_url = "git@bitbucket.org:user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("ssh")); assert_eq!(parsed.user(), Some("git")); - assert_eq!(parsed.token(), None); + assert_eq!(parsed.password(), None); assert_eq!(parsed.host(), Some("bitbucket.org")); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), "user/repo.git"); @@ -62,13 +72,15 @@ fn ssh_user_bitbucket() { #[test] fn https_user_auth_bitbucket() { - let test_url = "https://x-token-auth:token@bitbucket.org/owner/name.git"; + let _ = env_logger::try_init(); + let test_url = "https://x-password-auth:token@bitbucket.org/owner/name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("https")); - assert_eq!(parsed.user(), Some("x-token-auth")); - assert_eq!(parsed.token(), Some("token")); + assert_eq!(parsed.user(), Some("x-password-auth")); + assert_eq!(parsed.password(), Some("token")); assert_eq!(parsed.host(), Some("bitbucket.org")); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), "/owner/name.git"); @@ -77,13 +89,15 @@ fn https_user_auth_bitbucket() { #[test] fn https_user_github() { + let _ = env_logger::try_init(); let test_url = "https://user@github.com/user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("https")); assert_eq!(parsed.user(), Some("user")); - assert_eq!(parsed.token(), None); + assert_eq!(parsed.password(), None); assert_eq!(parsed.host(), Some("github.com")); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), "/user/repo.git"); @@ -92,13 +106,15 @@ fn https_user_github() { #[test] fn ssh_user_github() { + let _ = env_logger::try_init(); let test_url = "git@github.com:user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("ssh")); assert_eq!(parsed.user(), Some("git")); - assert_eq!(parsed.token(), None); + assert_eq!(parsed.password(), None); assert_eq!(parsed.host(), Some("github.com")); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), "user/repo.git"); @@ -107,13 +123,15 @@ fn ssh_user_github() { #[test] fn https_user_auth_github() { - let test_url = "https://token:x-oauth-basic@github.com/owner/name.git"; + let _ = env_logger::try_init(); + let test_url = "https://password:x-oauth-basic@github.com/owner/name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("https")); - assert_eq!(parsed.user(), Some("token")); - assert_eq!(parsed.token(), Some("x-oauth-basic")); + assert_eq!(parsed.user(), Some("password")); + assert_eq!(parsed.password(), Some("x-oauth-basic")); assert_eq!(parsed.host(), Some("github.com")); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), "/owner/name.git"); @@ -122,13 +140,15 @@ fn https_user_auth_github() { #[test] fn ssh_user_azure_devops() { + let _ = env_logger::try_init(); let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("ssh")); assert_eq!(parsed.user(), Some("git")); - assert_eq!(parsed.token(), None); + assert_eq!(parsed.password(), None); assert_eq!(parsed.host(), Some("ssh.dev.azure.com")); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), "v3/CompanyName/ProjectName/RepoName"); @@ -137,13 +157,15 @@ fn ssh_user_azure_devops() { #[test] fn https_user_azure_devops() { + let _ = env_logger::try_init(); let test_url = "https://organization@dev.azure.com/organization/project/_git/repo"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("https")); assert_eq!(parsed.user(), Some("organization")); - assert_eq!(parsed.token(), None); + assert_eq!(parsed.password(), None); assert_eq!(parsed.host(), Some("dev.azure.com")); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), "/organization/project/_git/repo"); @@ -152,13 +174,15 @@ fn https_user_azure_devops() { #[test] fn ftp_user() { + let _ = env_logger::try_init(); let test_url = "ftp://git@host.tld/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("ftp")); assert_eq!(parsed.user(), Some("git")); - assert_eq!(parsed.token(), None); + assert_eq!(parsed.password(), None); assert_eq!(parsed.host(), Some("host.tld")); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), "/user/project-name.git"); @@ -167,13 +191,15 @@ fn ftp_user() { #[test] fn ftps_user() { + let _ = env_logger::try_init(); let test_url = "ftps://git@host.tld/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("ftps")); assert_eq!(parsed.user(), Some("git")); - assert_eq!(parsed.token(), None); + assert_eq!(parsed.password(), None); assert_eq!(parsed.host(), Some("host.tld")); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), "/user/project-name.git"); @@ -182,14 +208,15 @@ fn ftps_user() { #[test] fn relative_unix_path() { + let _ = env_logger::try_init(); let test_url = "../project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); - println! {"{parsed:#?}"}; assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("file")); assert_eq!(parsed.user(), None); - assert_eq!(parsed.token(), None); + assert_eq!(parsed.password(), None); assert_eq!(parsed.host(), None); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), "../project-name.git"); @@ -198,14 +225,15 @@ fn relative_unix_path() { #[test] fn absolute_unix_path() { + let _ = env_logger::try_init(); let test_url = "/path/to/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); - println! {"{parsed:#?}"}; assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("file")); assert_eq!(parsed.user(), None); - assert_eq!(parsed.token(), None); + assert_eq!(parsed.password(), None); assert_eq!(parsed.host(), None); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), "/path/to/project-name.git"); @@ -214,14 +242,15 @@ fn absolute_unix_path() { #[test] fn relative_windows_path() { + let _ = env_logger::try_init(); let test_url = r"..\project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); - println! {"{parsed:#?}"}; assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("file")); assert_eq!(parsed.user(), None); - assert_eq!(parsed.token(), None); + assert_eq!(parsed.password(), None); assert_eq!(parsed.host(), None); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), "..\\project-name.git"); @@ -230,13 +259,15 @@ fn relative_windows_path() { #[test] fn absolute_windows_path() { + let _ = env_logger::try_init(); let test_url = r"c:\project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); - println!("{parsed:#?}"); + debug!("{:#?}", parsed); + assert_eq!(parsed.to_string(), test_url); assert_eq!(parsed.scheme(), Some("file")); assert_eq!(parsed.user(), None); - assert_eq!(parsed.token(), None); + assert_eq!(parsed.password(), None); assert_eq!(parsed.host(), None); assert_eq!(parsed.port(), None); assert_eq!(parsed.path(), r"c:\project-name.git"); @@ -245,8 +276,10 @@ fn absolute_windows_path() { #[test] fn bad_port_1() { + let _ = env_logger::try_init(); let test_url = "https://github.com:crypto-browserify/browserify-rsa.git"; let e = GitUrl::parse(test_url); + debug!("{:#?}", e); assert!(e.is_err()); if let Err(err) = e { @@ -256,8 +289,10 @@ fn bad_port_1() { #[test] fn bad_port_2() { + let _ = env_logger::try_init(); let test_url = "https://example.org:7z"; let e = GitUrl::parse(test_url); + debug!("{:#?}", e); assert!(e.is_err()); if let Err(err) = e { @@ -267,8 +302,10 @@ fn bad_port_2() { #[test] fn port_out_of_range() { + let _ = env_logger::try_init(); let test_url = "https://example.org:70000"; let e = GitUrl::parse(test_url); + debug!("{:#?}", e); assert!(e.is_err()); if let Err(err) = e { @@ -278,8 +315,10 @@ fn port_out_of_range() { #[test] fn host_missing_1() { + let _ = env_logger::try_init(); let test_url = "https://:443"; let e = GitUrl::parse(test_url); + debug!("{:#?}", e); assert!(e.is_err()); if let Err(err) = e { @@ -289,8 +328,10 @@ fn host_missing_1() { #[test] fn host_missing_2() { + let _ = env_logger::try_init(); let test_url = "https://user:pass@"; let e = GitUrl::parse(test_url); + debug!("{:#?}", e); assert!(e.is_err()); if let Err(err) = e { @@ -301,8 +342,10 @@ fn host_missing_2() { // FIXME: This test does not throw the correct error #[test] fn host_invalid() { + let _ = env_logger::try_init(); let test_url = "foo://exa[mple.org/owner/repo.git"; let e = GitUrl::parse(test_url); + debug!("{:#?}", e); assert!(e.is_err()); } diff --git a/tests/provider.rs b/tests/provider.rs index b83fa2d..823601f 100644 --- a/tests/provider.rs +++ b/tests/provider.rs @@ -2,31 +2,44 @@ use git_url_parse::types::provider::{ AzureDevOpsProvider, GenericProvider, GitLabProvider, GitProvider, }; use git_url_parse::{GitUrl, GitUrlParseError}; +use log::debug; #[test] fn http_generic_git() { + let _ = env_logger::try_init(); let test_url = "https://github.com/tjtelan/git-url-parse-rs.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); let provider_info: GenericProvider = parsed.provider_info().unwrap(); - let expected = GenericProvider { - owner: "tjtelan", - repo: "git-url-parse-rs", - }; - assert_eq!(provider_info, expected) + debug!("{:#?}", provider_info); + + let owner = "tjtelan"; + let repo = "git-url-parse-rs"; + let full = format!("{owner}/{repo}"); + + assert_eq!(provider_info.owner(), owner); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); } #[test] fn ssh_generic_git() { + let _ = env_logger::try_init(); let test_url = "git@github.com:tjtelan/git-url-parse-rs.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); let provider_info: GenericProvider = parsed.provider_info().unwrap(); - let expected = GenericProvider { - owner: "tjtelan", - repo: "git-url-parse-rs", - }; - assert_eq!(provider_info, expected) + debug!("{:#?}", provider_info); + + let owner = "tjtelan"; + let repo = "git-url-parse-rs"; + let full = format!("{owner}/{repo}"); + + assert_eq!(provider_info.owner(), owner); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); } #[test] @@ -39,117 +52,172 @@ fn custom_provider() { } } + let _ = env_logger::try_init(); let test_url = "git@github.com:tjtelan/git-url-parse-rs.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); let provider_info: TestProvider = parsed.provider_info().unwrap(); + debug!("{:#?}", provider_info); + let expected = TestProvider; assert_eq!(provider_info, expected) } #[test] fn self_host() { + let _ = env_logger::try_init(); let test_url = "http://git.example.com:3000/user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); let provider_info: GenericProvider = parsed.provider_info().unwrap(); - let expected = GenericProvider { - owner: "user", - repo: "repo", - }; - assert_eq!(provider_info, expected) + debug!("{:#?}", provider_info); + + let owner = "user"; + let repo = "repo"; + let full = format!("{owner}/{repo}"); + + assert_eq!(provider_info.owner(), owner); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); } #[test] fn http_azure_devops() { + let _ = env_logger::try_init(); let test_url = "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); let provider_info: AzureDevOpsProvider = parsed.provider_info().unwrap(); - let expected = AzureDevOpsProvider { - org: "CompanyName", - project: "ProjectName", - repo: "RepoName", - }; - assert_eq!(provider_info, expected) + debug!("{:#?}", provider_info); + + let org = "CompanyName"; + let project = "ProjectName"; + let repo = "RepoName"; + let full = format!("{org}/{project}/{repo}"); + + assert_eq!(provider_info.org(), org); + assert_eq!(provider_info.project(), project); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); } #[test] fn ssh_azure_devops() { + let _ = env_logger::try_init(); let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); let provider_info: AzureDevOpsProvider = parsed.provider_info().unwrap(); - let expected = AzureDevOpsProvider { - org: "CompanyName", - project: "ProjectName", - repo: "RepoName", - }; - assert_eq!(provider_info, expected) + debug!("{:#?}", provider_info); + + let org = "CompanyName"; + let project = "ProjectName"; + let repo = "RepoName"; + let full = format!("{org}/{project}/{repo}"); + + assert_eq!(provider_info.org(), org); + assert_eq!(provider_info.project(), project); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); } #[test] fn http_gitlab() { + let _ = env_logger::try_init(); let test_url = "https://gitlab.com/gitlab-org/gitlab.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); let provider_info: GitLabProvider = parsed.provider_info().unwrap(); - let expected = GitLabProvider { - owner: "gitlab-org", - subgroup: None, - repo: "gitlab", - }; - assert_eq!(provider_info, expected) + debug!("{:#?}", provider_info); + + let owner = "gitlab-org"; + let subgroup = None; + let repo = "gitlab"; + let full = format!("{owner}/{repo}"); + + assert_eq!(provider_info.owner(), owner); + assert_eq!(provider_info.subgroup(), subgroup); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); } #[test] fn ssh_gitlab() { + let _ = env_logger::try_init(); let test_url = "git@gitlab.com:gitlab-org/gitlab.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); let provider_info: GitLabProvider = parsed.provider_info().unwrap(); - let expected = GitLabProvider { - owner: "gitlab-org", - subgroup: None, - repo: "gitlab", - }; - assert_eq!(provider_info, expected) + debug!("{:#?}", provider_info); + + let owner = "gitlab-org"; + let subgroup = None; + let repo = "gitlab"; + let full = format!("{owner}/{repo}"); + + assert_eq!(provider_info.owner(), owner); + assert_eq!(provider_info.subgroup(), subgroup); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); } #[test] fn http_gitlab_subgroups() { + let _ = env_logger::try_init(); let test_url = "https://gitlab.com/gitlab-org/sbom/systems/gitlab-core.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); let provider_info: GitLabProvider = parsed.provider_info().unwrap(); - let expected = GitLabProvider { - owner: "gitlab-org", - subgroup: Some(vec!["sbom", "systems"]), - repo: "gitlab-core", - }; - assert_eq!(provider_info, expected) + debug!("{:#?}", provider_info); + + let owner = "gitlab-org"; + let subgroup = Some(vec!["sbom", "systems"]); + let repo = "gitlab-core"; + let full = format!("{owner}/{}/{repo}", "sbom/systems"); + + assert_eq!(provider_info.owner(), owner); + assert_eq!(provider_info.subgroup(), subgroup); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); } #[test] fn ssh_gitlab_subgroups() { + let _ = env_logger::try_init(); let test_url = "git@gitlab.com:gitlab-org/sbom/systems/gitlab-core.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); let provider_info: GitLabProvider = parsed.provider_info().unwrap(); - let expected = GitLabProvider { - owner: "gitlab-org", - subgroup: Some(vec!["sbom", "systems"]), - repo: "gitlab-core", - }; - assert_eq!(provider_info, expected) + debug!("{:#?}", provider_info); + + let owner = "gitlab-org"; + let subgroup = Some(vec!["sbom", "systems"]); + let repo = "gitlab-core"; + let full = format!("{owner}/{}/{repo}", "sbom/systems"); + + assert_eq!(provider_info.owner(), owner); + assert_eq!(provider_info.subgroup(), subgroup); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); } #[test] fn filepath() { + let _ = env_logger::try_init(); let test_url = "file:///home/user/Documents/"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); let provider_info: Result = parsed.provider_info(); + debug!("{:#?}", provider_info); assert!(provider_info.is_err()); if let Err(e) = provider_info { diff --git a/tests/trim_auth.rs b/tests/trim_auth.rs index ed82d6b..5929b23 100644 --- a/tests/trim_auth.rs +++ b/tests/trim_auth.rs @@ -1,12 +1,16 @@ use git_url_parse::*; +use log::debug; #[test] fn ssh_user_ports() { + let _ = env_logger::try_init(); let test_url = "ssh://git@host.tld:9999/user/project-name.git"; let expected = "ssh://host.tld:9999/user/project-name.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); + debug!("{:#?}", parsed); + debug!("{:#?}", trimmed); assert_eq!(trimmed.to_string(), expected); } @@ -14,88 +18,112 @@ fn ssh_user_ports() { // Specific service support #[test] fn https_user_bitbucket() { + let _ = env_logger::try_init(); let test_url = "https://user@bitbucket.org/user/repo.git"; let expected = "https://bitbucket.org/user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); + debug!("{:#?}", parsed); + debug!("{:#?}", trimmed); assert_eq!(trimmed.to_string(), expected); } #[test] fn ssh_user_bitbucket() { + let _ = env_logger::try_init(); let test_url = "git@bitbucket.org:user/repo.git"; let expected = "bitbucket.org:user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); + debug!("{:#?}", parsed); + debug!("{:#?}", trimmed); assert_eq!(trimmed.to_string(), expected); } #[test] fn https_user_auth_bitbucket() { + let _ = env_logger::try_init(); let test_url = "https://x-token-auth:token@bitbucket.org/owner/name.git/"; let expected = "https://bitbucket.org/owner/name.git/"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); + debug!("{:#?}", parsed); + debug!("{:#?}", trimmed); assert_eq!(trimmed.to_string(), expected); } #[test] fn https_user_github() { + let _ = env_logger::try_init(); let test_url = "https://user@github.com/user/repo.git/"; let expected = "https://github.com/user/repo.git/"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); + debug!("{:#?}", parsed); + debug!("{:#?}", trimmed); assert_eq!(trimmed.to_string(), expected); } #[test] fn ssh_user_github() { + let _ = env_logger::try_init(); let test_url = "git@github.com:user/repo.git"; let expected = "github.com:user/repo.git"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); + debug!("{:#?}", parsed); + debug!("{:#?}", trimmed); assert_eq!(trimmed.to_string(), expected); } #[test] fn https_user_auth_github() { + let _ = env_logger::try_init(); let test_url = "https://token:x-oauth-basic@github.com/owner/name.git/"; let expected = "https://github.com/owner/name.git/"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); + debug!("{:#?}", parsed); + debug!("{:#?}", trimmed); assert_eq!(trimmed.to_string(), expected); } #[test] fn ssh_user_azure_devops() { + let _ = env_logger::try_init(); let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; let expected = "ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); + debug!("{:#?}", parsed); + debug!("{:#?}", trimmed); assert_eq!(trimmed.to_string(), expected); } #[test] fn https_user_azure_devops() { + let _ = env_logger::try_init(); let test_url = "https://organization@dev.azure.com/organization/project/_git/repo"; let expected = "https://dev.azure.com/organization/project/_git/repo"; let parsed = GitUrl::parse(test_url).expect("URL parse failed"); let trimmed = parsed.trim_auth(); + debug!("{:#?}", parsed); + debug!("{:#?}", trimmed); assert_eq!(trimmed.to_string(), expected); } From 66537940dd0f988eaf754406920186525848911f Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Fri, 12 Sep 2025 16:14:51 -0700 Subject: [PATCH 27/32] Update git cliff config Re-added support and test for short git scheme --- cliff.toml | 154 ++++++++++++++++++++------------------ src/lib.rs | 20 ++--- src/types/mod.rs | 2 +- src/types/provider/mod.rs | 14 ++-- src/types/spec.rs | 46 +++++++++++- tests/parse.rs | 18 +++++ 6 files changed, 163 insertions(+), 91 deletions(-) diff --git a/cliff.toml b/cliff.toml index 347c148..a4d290c 100644 --- a/cliff.toml +++ b/cliff.toml @@ -1,7 +1,9 @@ -# configuration file for git-cliff (0.1.0) +# git-cliff ~ configuration file +# https://git-cliff.org/docs/configuration [changelog] -# changelog header +# A Tera template to be rendered as the changelog's header. +# See https://keats.github.io/tera/docs/#introduction header = """ # Changelog\n All notable changes to this project will be documented in this file. @@ -9,88 +11,96 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\n """ -# template for the changelog body -# https://tera.netlify.app/docs/#introduction +# A Tera template to be rendered for each release in the changelog. +# See https://keats.github.io/tera/docs/#introduction body = """ -{% if version %}\ - ## [{{ version | trim_start_matches(pat="v") }}](https://github.com/tjtelan/git-url-parse-rs/tree/{{version}}) - {{ timestamp | date(format="%Y-%m-%d") }} -{% else %}\ - ## [UNRELEASED] -{% endif %}\ - -{% for group, commits in commits - | filter(attribute="merge_commit", value=false) - | unique(attribute="message") - | group_by(attribute="group") %} +{% set package = "git-url-parse" %} +{%- macro remote_url() -%} + https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }} +{%- endmacro -%} + +{% if version -%} + ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }} +{% else -%} + ## [Unreleased] +{% endif -%} + +{% for group, commits in commits | group_by(attribute="group") %} ### {{ group | upper_first }} - {% for commit in commits %} - - {{ commit.message | upper_first | split(pat="\n") | first }}\ + {%- for commit in commits %} + - {{ commit.message | split(pat="\n") | first | upper_first | trim }}\ + {% if commit.remote.username %} by @{{ commit.remote.username }}{%- endif -%} + {% if commit.remote.pr_number %} in \ + [#{{ commit.remote.pr_number }}]({{ self::remote_url() }}/pull/{{ commit.remote.pr_number }}) \ + {%- endif -%} {% endfor %} -{% endfor %}\n +{% endfor %} + +{%- if github.contributors | filter(attribute="is_first_time", value=true) | length != 0 %} + ### New Contributors +{%- endif -%} + +{% for contributor in github.contributors | filter(attribute="is_first_time", value=true) %} + * @{{ contributor.username }} made their first contribution + {%- if contributor.pr_number %} in \ + [#{{ contributor.pr_number }}]({{ self::remote_url() }}/pull/{{ contributor.pr_number }}) \ + {%- endif %} +{%- endfor %}\n\n """ -# remove the leading and trailing whitespace from the template -trim = true -# changelog footer +# A Tera template to be rendered as the changelog's footer. +# See https://keats.github.io/tera/docs/#introduction footer = """ +{%- macro remote_url() -%} + https://github.com/{{ remote.github.owner }}/{{ remote.github.repo }} +{%- endmacro -%} + +{% for release in releases -%} + {% if release.version -%} + {% if release.previous.version -%} + [{{ release.version | trim_start_matches(pat="v") }}]: \ + {{ self::remote_url() }}/compare/{{ release.previous.version }}..{{ release.version }} + {% endif -%} + {% else -%} + [unreleased]: {{ self::remote_url() }}/compare/{{ release.previous.version }}..HEAD + {% endif -%} + \n +{% endfor %} """ +# Remove leading and trailing whitespaces from the changelog's body. +trim = true [git] -# parse the commits based on https://www.conventionalcommits.org +# Parse commits according to the conventional commits specification. +# See https://www.conventionalcommits.org conventional_commits = true -# filter out the commits that are not conventional +# Exclude commits that do not match the conventional commits specification. filter_unconventional = false - +# An array of regex based parsers to modify commit messages prior to further processing. commit_preprocessors = [ - { pattern = "\\(#([0-9]+)\\)", replace = "([#${1}](https://github.com/tjtelan/git-url-parse-rs/issues/${1}))" }, + # Remove issue numbers. + { pattern = '\((\w+\s)?#([0-9]+)\)', replace = "" }, ] - -# regex for parsing and grouping commits +# An array of regex based parsers for extracting data from the commit message. +# Assigns commits to groups. +# Optionally sets the commit's scope and can decide to exclude commits from further processing. commit_parsers = [ - - { message = ".*[Bb]ump", group = "Noise", skip = true }, - { message = ".*[Rr]evert", group = "Noise", skip = true }, - { message = ".*[Cl]ippy", group = "Noise", skip = true }, - { message = "^Merge pull request", group = "Noise", skip = true }, - - { message = "^test", group = "Fixed" }, - { message = "^.*[Ff]ix", group = "Fixed" }, - { message = "^[Rr]esolve", group = "Fixed" }, - - { message = "[Cc]ompile", group = "CI" }, - { message = "[Pp]ublish", group = "CI" }, - - { message = ".*[Dd]eprecate", group = "Removed" }, - { message = "^[Dd]isable", group = "Removed" }, - - { message = ".*[Aa]dd", group = "Added" }, - { message = ".*[Ss]upport", group = "Added" }, - { message = ".*[Mm]ake", group = "Added" }, - - { message = ".*[Rr]emove", group = "Removed" }, - { message = ".*[Dd]elete", group = "Removed" }, - { message = ".*[Dd]isable", group = "Removed" }, - - { message = "[Rr]elease", group = "CI" }, - { message = ".*[Ll]og", group = "CI" }, - { message = ".*[Bb]uild", group = "CI" }, - - { message = ".*[Uu]pdate", group = "Changed" }, - - { message = ".*[Cc]lean", group = "Other" }, - { message = ".*[Rr]efactor", group = "Other" }, - { message = "^.*", group = "Other" }, - + { message = "^[a|A]dd", group = "Added" }, + { message = "^[s|S]upport", group = "Added" }, + { message = "^[r|R]emove", group = "Removed" }, + { message = "^.*: add", group = "Added" }, + { message = "^.*: support", group = "Added" }, + { message = "^.*: remove", group = "Removed" }, + { message = "^.*: delete", group = "Removed" }, + { message = "^test", group = "Fixed" }, + { message = "^fix", group = "Fixed" }, + { message = "^.*: fix", group = "Fixed" }, + { message = "^.*", group = "Changed" }, ] -# filter out the commits that are not matched by commit parsers -filter_commits = true -# glob pattern for matching git tags -tag_pattern = "v[0-9]*" -# regex for skipping tags -skip_tags = ".*-rc|.*-alpha|.*-beta" -# regex for ignoring tags -ignore_tags = "" -# sort the tags chronologically -date_order = false -# sort the commits inside sections by oldest/newest order -sort_commits = "oldest" +# Exclude commits that are not matched by any commit parser. +filter_commits = false +# Order releases topologically instead of chronologically. +topo_order = false +# Order of commits in each group/release within the changelog. +# Allowed values: newest, oldest +sort_commits = "newest" \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 2386dee..f97944a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,7 +45,7 @@ //! let provider : GenericProvider = ssh_url.provider_info()?; //! assert_eq!(provider.owner(), "tjtelan"); //! assert_eq!(provider.repo(), "git-url-parse-rs"); -//! +//! //! // Implement your own provider //! #[derive(Debug, Clone, PartialEq, Eq)] //! struct CustomProvider; @@ -56,7 +56,7 @@ //! Ok(Self) //! } //! } -//! +//! //! let custom_provider: CustomProvider = ssh_url.provider_info()?; //! let expected = CustomProvider; //! assert_eq!(custom_provider, expected); @@ -80,18 +80,18 @@ //! ```shell //! cargo add git-url-parse //! ``` -//! +//! //! ### Cargo Features -//! -//! #### `log` +//! +//! #### `log` //! Enable for internal `debug!` output from [log](https://docs.rs/log/latest) -//! #### `serde` +//! #### `serde` //! Enable for [serde](https://docs.rs/serde/latest/) `Serialize`/`Deserialize` on [`GitUrl`](crate::types::GitUrl) -//! #### `url` +//! #### `url` //! (**enabled by default**) -//! -//! Uses [url](https://docs.rs/url/latest/) during parsing for full url validation -//! +//! +//! Uses [url](https://docs.rs/url/latest/) during parsing for full url validation +//! pub mod types; diff --git a/src/types/mod.rs b/src/types/mod.rs index bebf9ca..c444b72 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -59,7 +59,7 @@ pub struct GitUrl<'url> { /// The port number of the repo host, if specified #[getset(get_copy = "pub")] port: Option, - /// File or network path to repo + /// File or network path to repo #[getset(get_copy = "pub", set = "pub(crate)")] path: &'url str, /// If we should print `scheme://` from input or derived during parsing diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index f8e8006..3176556 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -59,23 +59,23 @@ pub trait GitProvider: Clone + std::fmt::Debug { /// /// - Common service hosting with `owner/repo` patterns (e.g. GitHub, Bitbucket) /// - Self-hosted repositories (e.g. Codeberg, Gitea) -/// +/// /// Example: /// /// ``` /// use git_url_parse::{GitUrl, GitUrlParseError}; /// use git_url_parse::types::provider::GenericProvider; -/// +/// /// let test_url = "git@github.com:tjtelan/git-url-parse-rs.git"; /// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -/// +/// /// let provider_info: GenericProvider = parsed.provider_info().unwrap(); -/// +/// /// assert_eq!(provider_info.owner(), "tjtelan"); /// assert_eq!(provider_info.repo(), "git-url-parse-rs"); /// assert_eq!(provider_info.fullname(), "tjtelan/git-url-parse-rs"); /// ``` -/// +/// #[derive(Debug, PartialEq, Eq, Clone, CopyGetters)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[getset(get_copy = "pub")] @@ -134,7 +134,7 @@ impl<'a> GitProvider, GitUrlParseError> for GenericProvider<'a> { /// assert_eq!(provider_info.repo(), "RepoName"); /// assert_eq!(provider_info.fullname(), "CompanyName/ProjectName/RepoName"); /// ``` -/// +/// #[derive(Debug, PartialEq, Eq, Clone, CopyGetters)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[getset(get_copy = "pub")] @@ -244,7 +244,7 @@ impl<'a> GitProvider, GitUrlParseError> for AzureDevOpsProvider<'a> { /// Ok(()) /// } /// ``` -/// +/// #[derive(Clone, Debug, PartialEq, Eq, Default, CopyGetters, CloneGetters)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct GitLabProvider<'a> { diff --git a/src/types/spec.rs b/src/types/spec.rs index 1b7decf..02a85f2 100644 --- a/src/types/spec.rs +++ b/src/types/spec.rs @@ -72,12 +72,19 @@ impl<'url> UrlSpecParser<'url> { )), ); + // Check if we have scheme 'git:' without the '//' for normalizing to 'git://' + if Self::short_git_scheme_check(input) { + // return early if we are normalizing 'git:' (short git) + if let Ok((input, scheme)) = Self::short_git_scheme_parser().parse(input) { + return Ok((input, scheme)); + } + } + if check.parse(input).is_err() { #[cfg(feature = "log")] { debug!("Look ahead check for scheme failed"); } - return Ok((input, None)); } @@ -426,6 +433,43 @@ impl<'url> UrlSpecParser<'url> { )), ) } + + /// consuming parser for `git:` (short git) as scheme for normalizing + fn short_git_scheme_parser() -> impl Parser< + &'url str, + Output = , + Error = nom::error::Error<&'url str>, + > as Parser<&'url str>>::Output, + Error = nom::error::Error<&'url str>, + > { + #[cfg(feature = "log")] + { + debug!("Parsing short git scheme"); + } + + context( + "short git scheme parse", + opt(terminated( + tag::<&str, &str, nom::error::Error<&str>>("git"), + tag::<&str, &str, nom::error::Error<&str>>(":"), + )), + ) + } + + /// Non-consuming check for `git:` (short git) as scheme for normalizing + fn short_git_scheme_check(input: &'url str) -> bool { + context( + "short git validate", + peek(terminated( + tag::<&str, &str, nom::error::Error<&str>>("git"), + tag::<&str, &str, nom::error::Error<&str>>(":"), + )), + ) + .parse(input) + .is_ok() + } } /// RFC 3986 userinfo diff --git a/tests/parse.rs b/tests/parse.rs index c7bfc31..08782eb 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -349,3 +349,21 @@ fn host_invalid() { assert!(e.is_err()); } + +#[test] +fn short_git() { + let _ = env_logger::try_init(); + let test_url = "git:github.com/owner/name.git"; + let expected_url = "git://github.com/owner/name.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); + + assert_eq!(parsed.to_string(), expected_url); + assert_eq!(parsed.scheme(), Some("git")); + assert_eq!(parsed.user(), None); + assert_eq!(parsed.password(), None); + assert_eq!(parsed.host(), Some("github.com")); + assert_eq!(parsed.port(), None); + assert_eq!(parsed.path(), "/owner/name.git"); + assert_eq!(parsed.print_scheme(), true); +} From 780d61415787ebd58cd063051487aa4ebb92993f Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Fri, 12 Sep 2025 17:47:05 -0700 Subject: [PATCH 28/32] Update readme and msrv --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0558388..45e6d66 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![Crates.io](https://img.shields.io/crates/v/git-url-parse)](https://crates.io/crates/git-url-parse) -![Crates.io MSRV](https://img.shields.io/crates/msrv/git-url-parse?label=rust-version) -[![Crates.io Total Downloads](https://img.shields.io/crates/d/git-url-parse?label=crates.io)](https://crates.io/crates/git-url-parse) +[![Crates.io Total Downloads](https://img.shields.io/crates/d/git-url-parse?label=Crates.io%20Downloads)](https://crates.io/crates/git-url-parse) +![Crates.io MSRV](https://img.shields.io/crates/msrv/git-url-parse?label=Min%20Supported%20Rust%20version) [![Github actions CI status](https://github.com/tjtelan/git-url-parse-rs/actions/workflows/ci.yml/badge.svg)](https://github.com/tjtelan/git-url-parse-rs/actions/workflows/ci.yml) [![docs.rs](https://docs.rs/git-url-parse/badge.svg)](https://docs.rs/git-url-parse/) [![License](https://img.shields.io/github/license/tjtelan/git-url-parse-rs)](LICENSE) From 03ff53911fd9b3a9039fec2c78bb21392cdf8a73 Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Fri, 12 Sep 2025 19:55:41 -0700 Subject: [PATCH 29/32] Fix msrv --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 79e46ea..d97898b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ name = "git-url-parse" readme = "README.md" repository = "https://github.com/tjtelan/git-url-parse-rs" version = "0.4.6" -rust-version = "1.82" +rust-version = "1.85" [features] default = ["url"] From 4cffa1ac72c571139d9a337548272f23426a3ccf Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Fri, 12 Sep 2025 20:03:09 -0700 Subject: [PATCH 30/32] Fix build error in feature --- src/types/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/types/mod.rs b/src/types/mod.rs index c444b72..adbe307 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -296,7 +296,10 @@ impl<'url> GitUrl<'url> { #[cfg(feature = "log")] { debug!("{:?}", self.hint()); - debug!("password support only for httplike url: {:?}", self.token()); + debug!( + "password support only for httplike url: {:?}", + self.password() + ); } return Err(GitUrlParseError::InvalidPasswordUnsupported); } From 792273c68143c9fd0df1b8e79acd31e1c225995f Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Fri, 12 Sep 2025 23:25:03 -0700 Subject: [PATCH 31/32] Update dev-dependencies with version numbers --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d97898b..0217c03 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,5 +30,5 @@ log = { version = "0.4", optional = true } url = { version = "2.5", optional = true } [dev-dependencies] -env_logger = "*" -log = "*" +env_logger = "0.11" +log = "0.4" From 1d3c6b8894ebf5d1ecd9b4fef0a8c3c5c2c5b9ae Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Fri, 12 Sep 2025 23:45:26 -0700 Subject: [PATCH 32/32] Cleanup in Cargo.toml --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0217c03..272ead7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["T.J. Telan "] categories = ["parser-implementations"] -description = "A parser for git repo urls" +description = "A parser for urls used by git" documentation = "https://docs.rs/git-url-parse" edition = "2024" keywords = ["git", "url", "parser"] @@ -14,7 +14,7 @@ rust-version = "1.85" [features] default = ["url"] -# Enable serde +# Enable Serialize/Deserialize on structs with `serde` crate serde = ["dep:serde"] # Enable debugging logging with `log` crate log = ["dep:log"] @@ -23,7 +23,7 @@ url = ["dep:url"] [dependencies] nom = "8" -getset = "0.1.6" +getset = "0.1" thiserror = "2" serde = { version = "1", features = ["derive"], optional = true } log = { version = "0.4", optional = true }