From 1dcc1989eb4e1efd2da16ad00b4a6f53e3754c93 Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Wed, 17 Sep 2025 23:00:19 -0700 Subject: [PATCH 1/3] Add parse_to_url, TryFrom and GitProvider for Url --- src/types/mod.rs | 36 +++- src/types/provider/azure_devops.rs | 139 +++++++++++++ src/types/provider/generic.rs | 93 +++++++++ src/types/provider/gitlab.rs | 144 +++++++++++++ src/types/provider/mod.rs | 319 +---------------------------- 5 files changed, 417 insertions(+), 314 deletions(-) create mode 100644 src/types/provider/azure_devops.rs create mode 100644 src/types/provider/generic.rs create mode 100644 src/types/provider/gitlab.rs diff --git a/src/types/mod.rs b/src/types/mod.rs index f0189c1..87889c2 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -196,6 +196,24 @@ impl TryFrom for Url { } } +#[cfg(feature = "url")] +impl TryFrom<&Url> for GitUrl { + type Error = GitUrlParseError; + fn try_from(value: &Url) -> Result { + // Since we don't fully implement any spec, we'll rely on the url crate + GitUrl::parse(value.as_str()) + } +} + +#[cfg(feature = "url")] +impl TryFrom for GitUrl { + type Error = GitUrlParseError; + fn try_from(value: Url) -> Result { + // Since we don't fully implement any spec, we'll rely on the url crate + GitUrl::parse(value.as_str()) + } +} + impl GitUrl { /// Returns `GitUrl` after removing all user info values pub fn trim_auth(&self) -> GitUrl { @@ -219,8 +237,16 @@ impl GitUrl { /// # } /// ``` pub fn parse(input: &str) -> Result { - let mut git_url_result = GitUrl::default(); + let git_url = Self::parse_to_git_url(input)?; + + git_url.is_valid()?; + Ok(git_url) + } + + /// Internal parse to `GitUrl` without further validation + fn parse_to_git_url(input: &str) -> Result { + let mut git_url_result = GitUrl::default(); // Error if there are null bytes within the url // https://github.com/tjtelan/git-url-parse-rs/issues/16 if input.contains('\0') { @@ -294,6 +320,14 @@ impl GitUrl { Ok(git_url_result) } + /// Normalize input into form that can be used by [`Url::parse`](https://docs.rs/url/latest/url/struct.Url.html#method.parse) + #[cfg(feature = "url")] + pub fn parse_to_url(input: &str) -> Result { + let git_url = Self::parse_to_git_url(input)?; + + Ok(Url::try_from(git_url)?) + } + /// ``` /// use git_url_parse::GitUrl; /// use git_url_parse::types::provider::GenericProvider; diff --git a/src/types/provider/azure_devops.rs b/src/types/provider/azure_devops.rs new file mode 100644 index 0000000..d818d6a --- /dev/null +++ b/src/types/provider/azure_devops.rs @@ -0,0 +1,139 @@ +use super::GitProvider; +use crate::types::GitUrlParseHint; +use crate::{GitUrl, GitUrlParseError}; + +use getset::Getters; +use nom::Parser; +use nom::bytes::complete::{is_not, tag, take_until}; +use nom::combinator::opt; +use nom::sequence::{preceded, separated_pair, terminated}; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +#[cfg(feature = "url")] +use url::Url; + +/// Azure DevOps repository provider +/// ## Supported URL Formats +/// +/// - `https://dev.azure.com/org/project/_git/repo` +/// - `git@ssh.dev.azure.com:v3/org/project/repo` +/// +/// Example: +/// +/// ``` +/// use git_url_parse::{GitUrl, GitUrlParseError}; +/// use git_url_parse::types::provider::AzureDevOpsProvider; +/// +/// let test_url = "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName"; +/// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); +/// +/// let provider_info: AzureDevOpsProvider = parsed.provider_info().unwrap(); +/// +/// assert_eq!(provider_info.org(), "CompanyName"); +/// assert_eq!(provider_info.project(), "ProjectName"); +/// assert_eq!(provider_info.repo(), "RepoName"); +/// assert_eq!(provider_info.fullname(), "CompanyName/ProjectName/RepoName"); +/// ``` +/// +#[derive(Debug, PartialEq, Eq, Clone, Getters)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[getset(get = "pub")] +pub struct AzureDevOpsProvider { + /// Azure Devops organization name + org: String, + /// Azure Devops project name + project: String, + /// Azure Devops repo name + repo: String, +} + +impl AzureDevOpsProvider { + /// Helper method to get the full name of a repo: `{org}/{project}/{repo}` + pub fn fullname(&self) -> String { + format!("{}/{}/{}", self.org, self.project, self.repo) + } + + /// Parse the path of a http url for Azure Devops patterns + fn parse_http_path(input: &str) -> Result<(&str, AzureDevOpsProvider), GitUrlParseError> { + // Handle optional leading / + let (input, _) = opt(tag("/")).parse(input)?; + + // Parse org/project/repo + let (input, (org, (project, repo))) = separated_pair( + is_not("/"), + tag("/"), + separated_pair( + is_not("/"), + tag("/"), + preceded(opt(tag("_git/")), is_not("")), + ), + ) + .parse(input)?; + + Ok(( + input, + AzureDevOpsProvider { + org: org.to_string(), + project: project.to_string(), + repo: repo.to_string(), + }, + )) + } + + /// Parse the path of an ssh url for Azure Devops patterns + fn parse_ssh_path(input: &str) -> Result<(&str, AzureDevOpsProvider), GitUrlParseError> { + // Handle optional leading v3/ or other prefix + let (input, _) = opt(take_until("/")).parse(input)?; + let (input, _) = opt(tag("/")).parse(input)?; + + // Parse org/project/repo + let (input, (org, (project, repo))) = separated_pair( + is_not("/"), + tag("/"), + separated_pair( + is_not("/"), + tag("/"), + terminated(is_not("."), opt(tag(".git"))), + ), + ) + .parse(input)?; + + Ok(( + input, + AzureDevOpsProvider { + org: org.to_string(), + project: project.to_string(), + repo: repo.to_string(), + }, + )) + } +} + +impl GitProvider for AzureDevOpsProvider { + fn from_git_url(url: &GitUrl) -> Result { + let path = url.path(); + + let parsed = if url.hint() == GitUrlParseHint::Httplike { + Self::parse_http_path(path) + } else { + Self::parse_ssh_path(path) + }; + + parsed.map(|(_, provider)| provider) + } +} + +#[cfg(feature = "url")] +impl GitProvider for AzureDevOpsProvider { + fn from_git_url(url: &Url) -> Result { + let path = url.path(); + + let parsed = if url.scheme().contains("http") { + Self::parse_http_path(path) + } else { + Self::parse_ssh_path(path) + }; + + parsed.map(|(_, provider)| provider) + } +} diff --git a/src/types/provider/generic.rs b/src/types/provider/generic.rs new file mode 100644 index 0000000..b725d20 --- /dev/null +++ b/src/types/provider/generic.rs @@ -0,0 +1,93 @@ +use super::GitProvider; +use crate::types::GitUrlParseHint; +use crate::{GitUrl, GitUrlParseError}; + +use getset::Getters; +use nom::Parser; +use nom::bytes::complete::{is_not, tag, take_until}; +use nom::combinator::opt; +use nom::sequence::separated_pair; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +#[cfg(feature = "url")] +use url::Url; + +/// Represents a generic Git repository provider +/// +/// ## Typical Use Cases +/// +/// - Common service hosting with `owner/repo` patterns (e.g. GitHub, Bitbucket) +/// - Self-hosted repositories (e.g. Codeberg, Gitea) +/// +/// Example: +/// +/// ``` +/// use git_url_parse::{GitUrl, GitUrlParseError}; +/// use git_url_parse::types::provider::GenericProvider; +/// +/// let test_url = "git@github.com:tjtelan/git-url-parse-rs.git"; +/// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); +/// +/// let provider_info: GenericProvider = parsed.provider_info().unwrap(); +/// +/// assert_eq!(provider_info.owner(), "tjtelan"); +/// assert_eq!(provider_info.repo(), "git-url-parse-rs"); +/// assert_eq!(provider_info.fullname(), "tjtelan/git-url-parse-rs"); +/// ``` +/// +#[derive(Debug, PartialEq, Eq, Clone, Getters)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[getset(get = "pub")] +pub struct GenericProvider { + /// Repo owner + owner: String, + /// Repo name + repo: String, +} + +impl GenericProvider { + /// Parse the most common form of git url by offered by git providers + fn parse_path(input: &str) -> Result<(&str, GenericProvider), GitUrlParseError> { + let (input, _) = opt(tag("/")).parse(input)?; + let (input, (user, repo)) = if input.ends_with(".git") { + separated_pair(is_not("/"), tag("/"), take_until(".git")).parse(input)? + } else { + separated_pair(is_not("/"), tag("/"), is_not("/")).parse(input)? + }; + Ok(( + input, + GenericProvider { + owner: user.to_string(), + repo: repo.to_string(), + }, + )) + } + + /// Helper method to get the full name of a repo: `{owner}/{repo}` + pub fn fullname(&self) -> String { + format!("{}/{}", self.owner, self.repo) + } +} + +impl GitProvider for GenericProvider { + fn from_git_url(url: &GitUrl) -> Result { + if url.hint() == GitUrlParseHint::Filelike { + return Err(GitUrlParseError::ProviderUnsupported); + } + + let path = url.path(); + Self::parse_path(path).map(|(_, provider)| provider) + } +} + +#[cfg(feature = "url")] +impl GitProvider for GenericProvider { + fn from_git_url(url: &Url) -> Result { + if url.scheme() == "file" { + return Err(GitUrlParseError::ProviderUnsupported); + } + + let path = url.path(); + Self::parse_path(path).map(|(_, provider)| provider) + } +} diff --git a/src/types/provider/gitlab.rs b/src/types/provider/gitlab.rs new file mode 100644 index 0000000..737d7d5 --- /dev/null +++ b/src/types/provider/gitlab.rs @@ -0,0 +1,144 @@ +use super::GitProvider; +use crate::{GitUrl, GitUrlParseError}; + +use getset::{CloneGetters, Getters}; +use nom::Parser; +use nom::bytes::complete::tag; +use nom::combinator::opt; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +#[cfg(feature = "url")] +use url::Url; + +/// ## GitLab repository provider +/// +/// ## Supported URL Formats +/// +/// - `https://gitlab.com/owner/repo.git` +/// - `https://gitlab.com/owner/subgroup1/subgroup2/repo.git` +/// - `git@gitlab.com:owner/repo.git` +/// - `git@gitlab.com:owner/subgroup1/subgroup2/repo.git` +/// +/// ## Examples +/// +/// ``` +/// use git_url_parse::GitUrl; +/// use git_url_parse::types::provider::GitLabProvider; +/// +/// fn main() -> Result<(), git_url_parse::GitUrlParseError> { +/// // Top-level repository +/// let url1 = GitUrl::parse("https://gitlab.com/gitlab-org/gitlab.git")?; +/// let provider1 : GitLabProvider = url1.provider_info()?; +/// assert_eq!(provider1.owner(), "gitlab-org"); +/// assert_eq!(provider1.repo(), "gitlab"); +/// assert_eq!(provider1.subgroup(), None); +/// assert_eq!(provider1.fullname(), "gitlab-org/gitlab"); +/// +/// // Repository with subgroups +/// let url2 = GitUrl::parse("https://gitlab.com/owner/group1/group2/project.git")?; +/// let provider2 : GitLabProvider = url2.provider_info()?; +/// assert_eq!(provider2.owner(), "owner"); +/// assert_eq!(provider2.repo(), "project"); +/// assert_eq!(provider2.subgroup(), Some(vec!["group1", "group2"])); +/// assert_eq!(provider2.fullname(), "owner/group1/group2/project"); +/// +/// Ok(()) +/// } +/// ``` +/// +#[derive(Clone, Debug, PartialEq, Eq, Default, Getters, CloneGetters)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct GitLabProvider { + /// Repo owner + #[getset(get = "pub")] + owner: String, + /// Gitlab subgroups + //#[getset(get_clone = "pub")] + subgroup: Option>, + /// Repo name + #[getset(get = "pub")] + repo: String, +} + +impl GitLabProvider { + /// Repo owner + /// Gitlab subgroups + pub fn subgroup(&self) -> Option> { + if let Some(s) = &self.subgroup { + let subgroup_vec: Vec<&str> = s.iter().map(|s| s.as_str()).collect(); + Some(subgroup_vec) + } else { + None + } + } + + /// Helper method to get the full name of a repo: `{owner}/{repo}` or `{owner}/{subgroups}/{repo}` + pub fn fullname(&self) -> String { + if let Some(subgroup) = self.subgroup() { + let subgroup_str = subgroup.join("/"); + + format!("{}/{subgroup_str}/{}", self.owner, self.repo) + } else { + format!("{}/{}", self.owner, self.repo) + } + } + + /// Parse the path of url for GitLab patterns + fn parse_path(input: &str) -> Result<(&str, GitLabProvider), GitUrlParseError> { + // Optional leading slash + let (input, _) = opt(tag("/")).parse(input)?; + + // Remove .git extension if present + let input = input.trim_end_matches(".git"); + + // Split the path + let parts: Vec<&str> = input.split('/').filter(|s| !s.is_empty()).collect(); + + // Ensure we have at least 2 parts (owner and repo) + if parts.len() < 2 { + return Err(GitUrlParseError::ProviderParseFail( + "Path needs at least 2 parts: ex. \'/owner/repo\'".into(), + )); + } + + // Last part is the repo + let repo = parts[parts.len() - 1].to_string(); + + // Everything before the last part is the owner/subgroups + let (owner, subgroup) = if parts.len() > 2 { + let subgroup: Vec = parts[1..(parts.len() - 1)] + .iter() + .copied() + .map(|s| s.to_string()) + .collect(); + + (parts[0].to_string(), Some(subgroup)) + } else { + (parts[0].to_string(), None) + }; + + Ok(( + input, + GitLabProvider { + owner, + subgroup, + repo, + }, + )) + } +} + +impl GitProvider for GitLabProvider { + fn from_git_url(url: &GitUrl) -> Result { + let path = url.path(); + Self::parse_path(path).map(|(_, provider)| provider) + } +} + +#[cfg(feature = "url")] +impl GitProvider for GitLabProvider { + fn from_git_url(url: &Url) -> Result { + let path = url.path(); + Self::parse_path(path).map(|(_, provider)| provider) + } +} diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index 309da1d..c130631 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -9,16 +9,13 @@ //! - [GitLab](crate::types::provider::GitLabProvider) //! - Custom (via [`GitProvider`] trait) -use crate::types::GitUrlParseHint; -use crate::{GitUrl, GitUrlParseError}; +mod azure_devops; +mod generic; +mod gitlab; -use getset::{CloneGetters, Getters}; -use nom::Parser; -use nom::bytes::complete::{is_not, tag, take_until}; -use nom::combinator::opt; -use nom::sequence::{preceded, separated_pair, terminated}; -#[cfg(feature = "serde")] -use serde::{Deserialize, Serialize}; +pub use azure_devops::AzureDevOpsProvider; +pub use generic::GenericProvider; +pub use gitlab::GitLabProvider; /// Secondary parser called by [`GitUrl::provider_info()`] to extract Git host provider info from url /// @@ -52,307 +49,3 @@ pub trait GitProvider: Clone + std::fmt::Debug { /// Logic for extracting service level information from a `GitUrl` fn from_git_url(url: &T) -> Result; } - -/// Represents a generic Git repository provider -/// -/// ## Typical Use Cases -/// -/// - Common service hosting with `owner/repo` patterns (e.g. GitHub, Bitbucket) -/// - Self-hosted repositories (e.g. Codeberg, Gitea) -/// -/// Example: -/// -/// ``` -/// use git_url_parse::{GitUrl, GitUrlParseError}; -/// use git_url_parse::types::provider::GenericProvider; -/// -/// let test_url = "git@github.com:tjtelan/git-url-parse-rs.git"; -/// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -/// -/// let provider_info: GenericProvider = parsed.provider_info().unwrap(); -/// -/// assert_eq!(provider_info.owner(), "tjtelan"); -/// assert_eq!(provider_info.repo(), "git-url-parse-rs"); -/// assert_eq!(provider_info.fullname(), "tjtelan/git-url-parse-rs"); -/// ``` -/// -#[derive(Debug, PartialEq, Eq, Clone, Getters)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[getset(get = "pub")] -pub struct GenericProvider { - /// Repo owner - owner: String, - /// Repo name - repo: String, -} - -impl GenericProvider { - /// Parse the most common form of git url by offered by git providers - fn parse_path(input: &str) -> Result<(&str, GenericProvider), GitUrlParseError> { - let (input, _) = opt(tag("/")).parse(input)?; - let (input, (user, repo)) = if input.ends_with(".git") { - separated_pair(is_not("/"), tag("/"), take_until(".git")).parse(input)? - } else { - separated_pair(is_not("/"), tag("/"), is_not("/")).parse(input)? - }; - Ok(( - input, - GenericProvider { - owner: user.to_string(), - repo: repo.to_string(), - }, - )) - } - - /// Helper method to get the full name of a repo: `{owner}/{repo}` - pub fn fullname(&self) -> String { - format!("{}/{}", self.owner, self.repo) - } -} - -impl GitProvider for GenericProvider { - fn from_git_url(url: &GitUrl) -> Result { - if url.hint() == GitUrlParseHint::Filelike { - return Err(GitUrlParseError::ProviderUnsupported); - } - - let path = url.path(); - Self::parse_path(path).map(|(_, provider)| provider) - } -} - -/// Azure DevOps repository provider -/// ## Supported URL Formats -/// -/// - `https://dev.azure.com/org/project/_git/repo` -/// - `git@ssh.dev.azure.com:v3/org/project/repo` -/// -/// Example: -/// -/// ``` -/// use git_url_parse::{GitUrl, GitUrlParseError}; -/// use git_url_parse::types::provider::AzureDevOpsProvider; -/// -/// let test_url = "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName"; -/// let parsed = GitUrl::parse(test_url).expect("URL parse failed"); -/// -/// let provider_info: AzureDevOpsProvider = parsed.provider_info().unwrap(); -/// -/// assert_eq!(provider_info.org(), "CompanyName"); -/// assert_eq!(provider_info.project(), "ProjectName"); -/// assert_eq!(provider_info.repo(), "RepoName"); -/// assert_eq!(provider_info.fullname(), "CompanyName/ProjectName/RepoName"); -/// ``` -/// -#[derive(Debug, PartialEq, Eq, Clone, Getters)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[getset(get = "pub")] -pub struct AzureDevOpsProvider { - /// Azure Devops organization name - org: String, - /// Azure Devops project name - project: String, - /// Azure Devops repo name - repo: String, -} - -impl AzureDevOpsProvider { - /// Helper method to get the full name of a repo: `{org}/{project}/{repo}` - pub fn fullname(&self) -> String { - format!("{}/{}/{}", self.org, self.project, self.repo) - } - - /// Parse the path of a http url for Azure Devops patterns - fn parse_http_path(input: &str) -> Result<(&str, AzureDevOpsProvider), GitUrlParseError> { - // Handle optional leading / - let (input, _) = opt(tag("/")).parse(input)?; - - // Parse org/project/repo - let (input, (org, (project, repo))) = separated_pair( - is_not("/"), - tag("/"), - separated_pair( - is_not("/"), - tag("/"), - preceded(opt(tag("_git/")), is_not("")), - ), - ) - .parse(input)?; - - Ok(( - input, - AzureDevOpsProvider { - org: org.to_string(), - project: project.to_string(), - repo: repo.to_string(), - }, - )) - } - - /// Parse the path of an ssh url for Azure Devops patterns - fn parse_ssh_path(input: &str) -> Result<(&str, AzureDevOpsProvider), GitUrlParseError> { - // Handle optional leading v3/ or other prefix - let (input, _) = opt(take_until("/")).parse(input)?; - let (input, _) = opt(tag("/")).parse(input)?; - - // Parse org/project/repo - let (input, (org, (project, repo))) = separated_pair( - is_not("/"), - tag("/"), - separated_pair( - is_not("/"), - tag("/"), - terminated(is_not("."), opt(tag(".git"))), - ), - ) - .parse(input)?; - - Ok(( - input, - AzureDevOpsProvider { - org: org.to_string(), - project: project.to_string(), - repo: repo.to_string(), - }, - )) - } -} - -impl GitProvider for AzureDevOpsProvider { - fn from_git_url(url: &GitUrl) -> Result { - let path = url.path(); - - let parsed = if url.hint() == GitUrlParseHint::Httplike { - Self::parse_http_path(path) - } else { - Self::parse_ssh_path(path) - }; - - parsed.map(|(_, provider)| provider) - } -} - -/// ## GitLab repository provider -/// -/// ## Supported URL Formats -/// -/// - `https://gitlab.com/owner/repo.git` -/// - `https://gitlab.com/owner/subgroup1/subgroup2/repo.git` -/// - `git@gitlab.com:owner/repo.git` -/// - `git@gitlab.com:owner/subgroup1/subgroup2/repo.git` -/// -/// ## Examples -/// -/// ``` -/// use git_url_parse::GitUrl; -/// use git_url_parse::types::provider::GitLabProvider; -/// -/// fn main() -> Result<(), git_url_parse::GitUrlParseError> { -/// // Top-level repository -/// let url1 = GitUrl::parse("https://gitlab.com/gitlab-org/gitlab.git")?; -/// let provider1 : GitLabProvider = url1.provider_info()?; -/// assert_eq!(provider1.owner(), "gitlab-org"); -/// assert_eq!(provider1.repo(), "gitlab"); -/// assert_eq!(provider1.subgroup(), None); -/// assert_eq!(provider1.fullname(), "gitlab-org/gitlab"); -/// -/// // Repository with subgroups -/// let url2 = GitUrl::parse("https://gitlab.com/owner/group1/group2/project.git")?; -/// let provider2 : GitLabProvider = url2.provider_info()?; -/// assert_eq!(provider2.owner(), "owner"); -/// assert_eq!(provider2.repo(), "project"); -/// assert_eq!(provider2.subgroup(), Some(vec!["group1", "group2"])); -/// assert_eq!(provider2.fullname(), "owner/group1/group2/project"); -/// -/// Ok(()) -/// } -/// ``` -/// -#[derive(Clone, Debug, PartialEq, Eq, Default, Getters, CloneGetters)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct GitLabProvider { - /// Repo owner - #[getset(get = "pub")] - owner: String, - /// Gitlab subgroups - //#[getset(get_clone = "pub")] - subgroup: Option>, - /// Repo name - #[getset(get = "pub")] - repo: String, -} - -impl GitLabProvider { - /// Repo owner - /// Gitlab subgroups - pub fn subgroup(&self) -> Option> { - if let Some(s) = &self.subgroup { - let subgroup_vec: Vec<&str> = s.iter().map(|s| s.as_str()).collect(); - Some(subgroup_vec) - } else { - None - } - } - - /// Helper method to get the full name of a repo: `{owner}/{repo}` or `{owner}/{subgroups}/{repo}` - pub fn fullname(&self) -> String { - if let Some(subgroup) = self.subgroup() { - let subgroup_str = subgroup.join("/"); - - format!("{}/{subgroup_str}/{}", self.owner, self.repo) - } else { - format!("{}/{}", self.owner, self.repo) - } - } - - /// Parse the path of url for GitLab patterns - fn parse_path(input: &str) -> Result<(&str, GitLabProvider), GitUrlParseError> { - // Optional leading slash - let (input, _) = opt(tag("/")).parse(input)?; - - // Remove .git extension if present - let input = input.trim_end_matches(".git"); - - // Split the path - let parts: Vec<&str> = input.split('/').filter(|s| !s.is_empty()).collect(); - - // Ensure we have at least 2 parts (owner and repo) - if parts.len() < 2 { - return Err(GitUrlParseError::ProviderParseFail( - "Path needs at least 2 parts: ex. \'/owner/repo\'".into(), - )); - } - - // Last part is the repo - let repo = parts[parts.len() - 1].to_string(); - - // Everything before the last part is the owner/subgroups - let (owner, subgroup) = if parts.len() > 2 { - let subgroup: Vec = parts[1..(parts.len() - 1)] - .iter() - .copied() - .map(|s| s.to_string()) - .collect(); - - (parts[0].to_string(), Some(subgroup)) - } else { - (parts[0].to_string(), None) - }; - - Ok(( - input, - GitLabProvider { - owner, - subgroup, - repo, - }, - )) - } -} - -impl GitProvider for GitLabProvider { - fn from_git_url(url: &GitUrl) -> Result { - let path = url.path(); - Self::parse_path(path).map(|(_, provider)| provider) - } -} From 0ae243b32e9c096a742d8803edafb443bd72ffb2 Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Thu, 18 Sep 2025 09:52:55 -0700 Subject: [PATCH 2/3] Add interchange support with url::Url --- src/lib.rs | 13 +- src/types/mod.rs | 94 +++++----- src/types/provider/azure_devops.rs | 7 +- src/types/provider/mod.rs | 13 +- tests/url_interop.rs | 276 +++++++++++++++++++++++++++++ 5 files changed, 344 insertions(+), 59 deletions(-) create mode 100644 tests/url_interop.rs diff --git a/src/lib.rs b/src/lib.rs index c981c92..0f893c8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,9 +15,9 @@ //! - 🏗️ Host provider info extraction //! - Easy to implement trait [`GitProvider`](crate::types::provider::GitProvider) for custom provider parsing //! - Built-in support for multiple Git hosting providers -//! * [Generic](crate::types::provider::GenericProvider) (`git@host:owner/repo.git` style urls) -//! * [GitLab](crate::types::provider::GitLabProvider) -//! * [Azure DevOps](crate::types::provider::AzureDevOpsProvider) +//! * [Generic](crate::types::provider::generic::GenericProvider) (`git@host:owner/repo.git` style urls) +//! * [GitLab](crate::types::provider::gitlab::GitLabProvider) +//! * [Azure DevOps](crate::types::provider::azure_devops::AzureDevOpsProvider) //! //! ## Quick Example //! @@ -90,7 +90,12 @@ //! #### `url` //! (**enabled by default**) //! -//! Uses [url](https://docs.rs/url/latest/) during parsing for full url validation +//! `GitUrl` parsing finishes with [url](https://docs.rs/url/latest/) during parsing for full url validation +//! +//! [`GitUrl::parse_to_url`] will normalize an ssh-based url and return [`url::Url`](https://docs.rs/url/latest/url/struct.Url.html) +//! +//! You can use `url::Url` with the built-in [`GitProvider`](crate::types::provider::GitProvider) host parsers. See the `url_interop` tests for examples +//! //! pub mod types; diff --git a/src/types/mod.rs b/src/types/mod.rs index 87889c2..70c60e3 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -67,15 +67,6 @@ pub struct GitUrl { hint: GitUrlParseHint, } -/// Build the printable GitUrl from its components -impl fmt::Display for GitUrl { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let git_url_str = self.display(); - - write!(f, "{git_url_str}",) - } -} - impl GitUrl { /// scheme name (i.e. `scheme://`) pub fn scheme(&self) -> Option<&str> { @@ -130,7 +121,7 @@ impl GitUrl { } /// This method rebuilds the printable GitUrl from its components. - /// `url_compat` results in output that can be parsed by the `url` crate + /// `url_compat` results in output that can be parsed by the [`url`](https://docs.rs/url/latest/url/) crate fn build_string(&self, url_compat: bool) -> String { let scheme = if self.print_scheme() || url_compat { if let Some(scheme) = self.scheme() { @@ -176,45 +167,7 @@ impl GitUrl { let git_url_str = format!("{scheme}{auth_info}{host}{port}{path}"); git_url_str } -} - -#[cfg(feature = "url")] -impl TryFrom<&GitUrl> for Url { - type Error = url::ParseError; - fn try_from(value: &GitUrl) -> Result { - // Since we don't fully implement any spec, we'll rely on the url crate - Url::parse(&value.url_compat_display()) - } -} - -#[cfg(feature = "url")] -impl TryFrom for Url { - type Error = url::ParseError; - fn try_from(value: GitUrl) -> Result { - // Since we don't fully implement any spec, we'll rely on the url crate - Url::parse(&value.url_compat_display()) - } -} - -#[cfg(feature = "url")] -impl TryFrom<&Url> for GitUrl { - type Error = GitUrlParseError; - fn try_from(value: &Url) -> Result { - // Since we don't fully implement any spec, we'll rely on the url crate - GitUrl::parse(value.as_str()) - } -} - -#[cfg(feature = "url")] -impl TryFrom for GitUrl { - type Error = GitUrlParseError; - fn try_from(value: Url) -> Result { - // Since we don't fully implement any spec, we'll rely on the url crate - GitUrl::parse(value.as_str()) - } -} -impl GitUrl { /// Returns `GitUrl` after removing all user info values pub fn trim_auth(&self) -> GitUrl { let mut new_giturl = self.clone(); @@ -244,7 +197,7 @@ impl GitUrl { Ok(git_url) } - /// Internal parse to `GitUrl` without further validation + /// Internal parse to `GitUrl` without validation steps fn parse_to_git_url(input: &str) -> Result { let mut git_url_result = GitUrl::default(); // Error if there are null bytes within the url @@ -414,3 +367,46 @@ impl GitUrl { Ok(()) } } + +/// Build the printable GitUrl from its components +impl fmt::Display for GitUrl { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let git_url_str = self.display(); + + write!(f, "{git_url_str}",) + } +} + +#[cfg(feature = "url")] +impl TryFrom<&GitUrl> for Url { + type Error = url::ParseError; + fn try_from(value: &GitUrl) -> Result { + // Since we don't fully implement any spec, we'll rely on the url crate + Url::parse(&value.url_compat_display()) + } +} + +#[cfg(feature = "url")] +impl TryFrom for Url { + type Error = url::ParseError; + fn try_from(value: GitUrl) -> Result { + // Since we don't fully implement any spec, we'll rely on the url crate + Url::parse(&value.url_compat_display()) + } +} + +#[cfg(feature = "url")] +impl TryFrom<&Url> for GitUrl { + type Error = GitUrlParseError; + fn try_from(value: &Url) -> Result { + GitUrl::parse(value.as_str()) + } +} + +#[cfg(feature = "url")] +impl TryFrom for GitUrl { + type Error = GitUrlParseError; + fn try_from(value: Url) -> Result { + GitUrl::parse(value.as_str()) + } +} diff --git a/src/types/provider/azure_devops.rs b/src/types/provider/azure_devops.rs index d818d6a..fa3667a 100644 --- a/src/types/provider/azure_devops.rs +++ b/src/types/provider/azure_devops.rs @@ -4,6 +4,7 @@ use crate::{GitUrl, GitUrlParseError}; use getset::Getters; use nom::Parser; +use nom::branch::alt; use nom::bytes::complete::{is_not, tag, take_until}; use nom::combinator::opt; use nom::sequence::{preceded, separated_pair, terminated}; @@ -82,8 +83,10 @@ impl AzureDevOpsProvider { /// Parse the path of an ssh url for Azure Devops patterns fn parse_ssh_path(input: &str) -> Result<(&str, AzureDevOpsProvider), GitUrlParseError> { - // Handle optional leading v3/ or other prefix - let (input, _) = opt(take_until("/")).parse(input)?; + // Handle optional leading /v3/ or v3/ prefix + let (input, _) = + opt(alt((preceded(tag("/"), tag("v3/")), take_until("/")))).parse(input)?; + let (input, _) = opt(tag("/")).parse(input)?; // Parse org/project/repo diff --git a/src/types/provider/mod.rs b/src/types/provider/mod.rs index c130631..36fc8fd 100644 --- a/src/types/provider/mod.rs +++ b/src/types/provider/mod.rs @@ -9,15 +9,18 @@ //! - [GitLab](crate::types::provider::GitLabProvider) //! - Custom (via [`GitProvider`] trait) -mod azure_devops; -mod generic; -mod gitlab; +/// Azure DevOps git host +pub mod azure_devops; +/// Generic git host +pub mod generic; +/// GitLab git host +pub mod gitlab; pub use azure_devops::AzureDevOpsProvider; pub use generic::GenericProvider; pub use gitlab::GitLabProvider; -/// Secondary parser called by [`GitUrl::provider_info()`] to extract Git host provider info from url +/// Secondary parser called by [`crate::GitUrl::provider_info()`] to extract Git host provider info from url /// /// ``` /// // Custom trait example @@ -43,6 +46,8 @@ pub use gitlab::GitLabProvider; /// let expected = MyCustomProvider; /// assert_eq!(provider_info, expected) /// ``` +/// +/// With `feature = url`, there is support for parsing Git host provider info from [`url::Url`](https://docs.rs/url/latest/url/struct.Url.html) pub trait GitProvider: Clone + std::fmt::Debug { /// Trait method called by `GitUrl::provider_info()` /// diff --git a/tests/url_interop.rs b/tests/url_interop.rs new file mode 100644 index 0000000..c5157f3 --- /dev/null +++ b/tests/url_interop.rs @@ -0,0 +1,276 @@ +use git_url_parse::GitUrl; +use git_url_parse::types::provider::{ + AzureDevOpsProvider, GenericProvider, GitLabProvider, GitProvider, +}; + +use log::debug; +#[cfg(feature = "url")] +use url::Url; + +#[cfg(feature = "url")] +#[test] +fn try_from_url_ssh_git() { + let _ = env_logger::try_init(); + let input = "git@host.tld:user/project-name.git"; + let expected = "ssh://git@host.tld/user/project-name.git"; + let parsed = GitUrl::parse(input).expect("URL parse failed"); + debug!("{:#?}", parsed); + + let convert = Url::try_from(parsed).unwrap(); + debug!("{:#?}", convert); + assert_eq!(convert.as_str(), expected); +} + +#[cfg(feature = "url")] +#[test] +fn parse_to_url_ssh_git() { + let _ = env_logger::try_init(); + let input = "git@host.tld:user/project-name.git"; + let expected = "ssh://git@host.tld/user/project-name.git"; + let parsed = GitUrl::parse(input).expect("URL parse failed"); + debug!("{:#?}", parsed); + + let direct = GitUrl::parse_to_url(input).unwrap(); + debug!("{:#?}", direct); + assert_eq!(direct.as_str(), expected); +} + +#[cfg(feature = "url")] +#[test] +fn https_user_github() { + let _ = env_logger::try_init(); + let test_url = "https://user@github.com/user/repo.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); + + let convert = Url::try_from(parsed).unwrap(); + debug!("{:#?}", convert); + assert_eq!(convert.as_str(), test_url); + + let direct = GitUrl::parse_to_url(test_url).unwrap(); + debug!("{:#?}", direct); + assert_eq!(direct.as_str(), test_url); +} + +#[cfg(feature = "url")] +#[test] +fn ssh_user_github() { + let _ = env_logger::try_init(); + let test_url = "git@github.com:user/repo.git"; + let expected_url = "ssh://git@github.com/user/repo.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); + + let convert = Url::try_from(parsed).unwrap(); + debug!("{:#?}", convert); + assert_eq!(convert.as_str(), expected_url); + + let direct = GitUrl::parse_to_url(test_url).unwrap(); + debug!("{:#?}", direct); + assert_eq!(direct.as_str(), expected_url); +} + +#[cfg(feature = "url")] +#[test] +fn url_relative_unix_path() { + let _ = env_logger::try_init(); + let test_url = "../project-name.git"; + let expected = "file://../project-name.git"; + let parsed = GitUrl::parse_to_url(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); + + assert_eq!(parsed.as_str(), expected) +} + +#[cfg(feature = "url")] +#[test] +fn self_host() { + let _ = env_logger::try_init(); + let test_url = "http://git.example.com:3000/user/repo.git"; + let parsed = GitUrl::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); + + let provider_info: GenericProvider = parsed.provider_info().unwrap(); + debug!("{:#?}", provider_info); + + let owner = "user"; + let repo = "repo"; + let full = format!("{owner}/{repo}"); + + assert_eq!(provider_info.owner(), owner); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); +} + +// Providers +#[cfg(feature = "url")] +#[test] +fn url_http_generic_git() { + let _ = env_logger::try_init(); + let test_url = "https://github.com/tjtelan/git-url-parse-rs.git"; + let parsed = Url::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); + + let provider_info: GenericProvider = GenericProvider::from_git_url(&parsed).unwrap(); + debug!("{:#?}", provider_info); + + let owner = "tjtelan"; + let repo = "git-url-parse-rs"; + let full = format!("{owner}/{repo}"); + + assert_eq!(provider_info.owner(), owner); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); +} + +#[cfg(feature = "url")] +#[test] +fn url_self_host() { + let _ = env_logger::try_init(); + let test_url = "http://git.example.com:3000/user/repo.git"; + let parsed = Url::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); + + let provider_info = GenericProvider::from_git_url(&parsed).unwrap(); + debug!("{:#?}", provider_info); + + let owner = "user"; + let repo = "repo"; + let full = format!("{owner}/{repo}"); + + assert_eq!(provider_info.owner(), owner); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); +} + +#[cfg(feature = "url")] +#[test] +fn url_http_azure_devops() { + let _ = env_logger::try_init(); + let test_url = "https://CompanyName@dev.azure.com/CompanyName/ProjectName/_git/RepoName"; + let parsed = Url::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); + + let provider_info = AzureDevOpsProvider::from_git_url(&parsed).unwrap(); + debug!("{:#?}", provider_info); + + let org = "CompanyName"; + let project = "ProjectName"; + let repo = "RepoName"; + let full = format!("{org}/{project}/{repo}"); + + assert_eq!(provider_info.org(), org); + assert_eq!(provider_info.project(), project); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); +} + +#[cfg(feature = "url")] +#[test] +fn url_ssh_azure_devops() { + let _ = env_logger::try_init(); + let test_url = "git@ssh.dev.azure.com:v3/CompanyName/ProjectName/RepoName.git"; + let parsed = GitUrl::parse_to_url(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); + + let provider_info = AzureDevOpsProvider::from_git_url(&parsed).unwrap(); + debug!("{:#?}", provider_info); + + let org = "CompanyName"; + let project = "ProjectName"; + let repo = "RepoName"; + let full = format!("{org}/{project}/{repo}"); + + assert_eq!(provider_info.org(), org); + assert_eq!(provider_info.project(), project); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); +} + +#[cfg(feature = "url")] +#[test] +fn url_http_gitlab() { + let _ = env_logger::try_init(); + let test_url = "https://gitlab.com/gitlab-org/gitlab.git"; + let parsed = Url::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); + + let provider_info = GitLabProvider::from_git_url(&parsed).unwrap(); + debug!("{:#?}", provider_info); + + let owner = "gitlab-org"; + let subgroup = None; + let repo = "gitlab"; + let full = format!("{owner}/{repo}"); + + assert_eq!(provider_info.owner(), owner); + assert_eq!(provider_info.subgroup(), subgroup); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); +} + +#[cfg(feature = "url")] +#[test] +fn url_ssh_gitlab() { + let _ = env_logger::try_init(); + let test_url = "git@gitlab.com:gitlab-org/gitlab.git"; + let parsed = GitUrl::parse_to_url(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); + + let provider_info = GitLabProvider::from_git_url(&parsed).unwrap(); + debug!("{:#?}", provider_info); + + let owner = "gitlab-org"; + let subgroup = None; + let repo = "gitlab"; + let full = format!("{owner}/{repo}"); + + assert_eq!(provider_info.owner(), owner); + assert_eq!(provider_info.subgroup(), subgroup); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); +} + +#[cfg(feature = "url")] +#[test] +fn url_http_gitlab_subgroups() { + let _ = env_logger::try_init(); + let test_url = "https://gitlab.com/gitlab-org/sbom/systems/gitlab-core.git"; + let parsed = Url::parse(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); + + let provider_info: GitLabProvider = GitLabProvider::from_git_url(&parsed).unwrap(); + debug!("{:#?}", provider_info); + + let owner = "gitlab-org"; + let subgroup = Some(vec!["sbom", "systems"]); + let repo = "gitlab-core"; + let full = format!("{owner}/{}/{repo}", "sbom/systems"); + + assert_eq!(provider_info.owner(), owner); + assert_eq!(provider_info.subgroup(), subgroup); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); +} + +#[cfg(feature = "url")] +#[test] +fn url_ssh_gitlab_subgroups() { + let _ = env_logger::try_init(); + let test_url = "git@gitlab.com:gitlab-org/sbom/systems/gitlab-core.git"; + let parsed = GitUrl::parse_to_url(test_url).expect("URL parse failed"); + debug!("{:#?}", parsed); + + let provider_info: GitLabProvider = GitLabProvider::from_git_url(&parsed).unwrap(); + debug!("{:#?}", provider_info); + + let owner = "gitlab-org"; + let subgroup = Some(vec!["sbom", "systems"]); + let repo = "gitlab-core"; + let full = format!("{owner}/{}/{repo}", "sbom/systems"); + + assert_eq!(provider_info.owner(), owner); + assert_eq!(provider_info.subgroup(), subgroup); + assert_eq!(provider_info.repo(), repo); + assert_eq!(provider_info.fullname(), full); +} From 969d582f471995e2a7a3d55d3410752f7079014c Mon Sep 17 00:00:00 2001 From: "T.J. Telan" Date: Thu, 18 Sep 2025 10:07:45 -0700 Subject: [PATCH 3/3] Add doctest for parse_to_url --- src/types/mod.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/types/mod.rs b/src/types/mod.rs index 70c60e3..6741693 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -274,6 +274,23 @@ impl GitUrl { } /// Normalize input into form that can be used by [`Url::parse`](https://docs.rs/url/latest/url/struct.Url.html#method.parse) + /// + /// ``` + /// use git_url_parse::GitUrl; + /// #[cfg(feature = "url")] + /// use url::Url; + /// + /// fn main() -> Result<(), git_url_parse::GitUrlParseError> { + /// let ssh_url = GitUrl::parse_to_url("git@github.com:tjtelan/git-url-parse-rs.git")?; + /// + /// assert_eq!(ssh_url.scheme(), "ssh"); + /// assert_eq!(ssh_url.username(), "git"); + /// assert_eq!(ssh_url.host_str(), Some("github.com")); + /// assert_eq!(ssh_url.path(), "/tjtelan/git-url-parse-rs.git"); + /// Ok(()) + /// } + /// ``` + /// #[cfg(feature = "url")] pub fn parse_to_url(input: &str) -> Result { let git_url = Self::parse_to_git_url(input)?;