Skip to content

Commit 67fd213

Browse files
committed
refactor: no regexp-based Reference::try_from
Signed-off-by: David Sánchez <davidslt+git@pm.me>
1 parent 3047d16 commit 67fd213

File tree

1 file changed

+82
-60
lines changed

1 file changed

+82
-60
lines changed

src/distribution/reference.rs

Lines changed: 82 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
use std::fmt;
22
use std::str::FromStr;
3-
use std::{convert::TryFrom, sync::OnceLock};
43

5-
use regex::{Regex, RegexBuilder};
64
use serde::{Deserialize, Serialize};
75
use thiserror::Error;
86

@@ -13,19 +11,6 @@ const DOCKER_HUB_DOMAIN_LEGACY: &str = "index.docker.io";
1311
const DOCKER_HUB_DOMAIN: &str = "docker.io";
1412
const DOCKER_HUB_OFFICIAL_REPO_NAME: &str = "library";
1513
const DEFAULT_TAG: &str = "latest";
16-
/// REFERENCE_REGEXP is the full supported format of a reference. The regexp
17-
/// is anchored and has capturing groups for name, tag, and digest components.
18-
const REFERENCE_REGEXP: &str = r"^((?:(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])(?:(?:\.(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9]))+)?(?::[0-9]+)?/)?[a-z0-9]+(?:(?:(?:[._]|__|[-]*)[a-z0-9]+)+)?(?:(?:/[a-z0-9]+(?:(?:(?:[._]|__|[-]*)[a-z0-9]+)+)?)+)?)(?::([\w][\w.-]{0,127}))?(?:@([A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}))?$";
19-
20-
fn reference_regexp() -> &'static Regex {
21-
static RE: OnceLock<Regex> = OnceLock::new();
22-
RE.get_or_init(|| {
23-
RegexBuilder::new(REFERENCE_REGEXP)
24-
.size_limit(10 * (1 << 21))
25-
.build()
26-
.unwrap()
27-
})
28-
}
2914

3015
/// Reasons that parsing a string as a Reference can fail.
3116
#[derive(Debug, Error, PartialEq, Eq)]
@@ -267,52 +252,42 @@ impl TryFrom<&str> for Reference {
267252
if s.is_empty() {
268253
return Err(ParseError::NameEmpty);
269254
}
270-
let captures = match reference_regexp().captures(s) {
271-
Some(caps) => caps,
272-
None => {
273-
return Err(ParseError::ReferenceInvalidFormat);
274-
}
275-
};
276-
let name = &captures[1];
277-
let mut tag = captures.get(2).map(|m| m.as_str().to_owned());
278-
let digest = captures.get(3).map(|m| m.as_str().to_owned());
279-
if tag.is_none() && digest.is_none() {
280-
tag = Some(DEFAULT_TAG.into());
255+
// A bare ':' or '@' prefix has no name component.
256+
if s.starts_with(':') || s.starts_with('@') {
257+
return Err(ParseError::ReferenceInvalidFormat);
281258
}
282-
let (registry, repository) = split_domain(name);
283-
let reference = Reference {
284-
registry,
285-
mirror_registry: None,
286-
repository,
287-
tag,
288-
digest,
259+
260+
// Extract the digest (`@<algo>:<hex>`).
261+
let (name_and_tag, digest) = match s.split_once('@') {
262+
Some((n, d)) => (n, Some(d)),
263+
None => (s, None),
289264
};
290-
if reference.repository().len() > NAME_TOTAL_LENGTH_MAX {
265+
266+
// Extract the tag.
267+
let (name, tag) = split_name_tag(name_and_tag);
268+
269+
// Get registry / repository.
270+
let (registry, repository) = split_domain(name);
271+
272+
// Length check (repository only).
273+
if repository.len() > NAME_TOTAL_LENGTH_MAX {
291274
return Err(ParseError::NameTooLong);
292275
}
293-
// Digests much always be hex-encoded, ensuring that their hex portion will always be
294-
// size*2
295-
if let Some(digest) = reference.digest() {
296-
match digest.split_once(':') {
297-
None => return Err(ParseError::DigestInvalidFormat),
298-
Some(("sha256", digest)) => {
299-
if digest.len() != 64 {
300-
return Err(ParseError::DigestInvalidLength);
301-
}
302-
}
303-
Some(("sha384", digest)) => {
304-
if digest.len() != 96 {
305-
return Err(ParseError::DigestInvalidLength);
306-
}
307-
}
308-
Some(("sha512", digest)) => {
309-
if digest.len() != 128 {
310-
return Err(ParseError::DigestInvalidLength);
311-
}
312-
}
313-
Some((_, _)) => return Err(ParseError::DigestUnsupported),
314-
}
276+
277+
// Character validation.
278+
validate_repository(&repository)?;
279+
if let Some(d) = digest {
280+
validate_digest(d)?;
315281
}
282+
283+
let reference = match (tag, digest) {
284+
(Some(t), Some(d)) => {
285+
Reference::with_tag_and_digest(registry, repository, t.to_owned(), d.to_owned())
286+
}
287+
(Some(t), None) => Reference::with_tag(registry, repository, t.to_owned()),
288+
(None, Some(d)) => Reference::with_digest(registry, repository, d.to_owned()),
289+
(None, None) => Reference::with_tag(registry, repository, DEFAULT_TAG.to_owned()),
290+
};
316291
Ok(reference)
317292
}
318293
}
@@ -365,6 +340,55 @@ fn split_domain(name: &str) -> (String, String) {
365340
(domain, remainder)
366341
}
367342

343+
/// Split `name[:tag]` into `(name, Option<tag>)`.
344+
///
345+
/// A `:` is treated as a tag separator only when it appears after the last `/`
346+
/// (or when there is no `/`), so that `host:port/repo` is parsed correctly.
347+
fn split_name_tag(s: &str) -> (&str, Option<&str>) {
348+
let last_slash = s.rfind('/');
349+
let last_colon = s.rfind(':');
350+
match (last_slash, last_colon) {
351+
(_, None) => (s, None),
352+
(None, Some(c)) => (&s[..c], Some(&s[c + 1..])),
353+
(Some(sl), Some(c)) if c > sl => (&s[..c], Some(&s[c + 1..])),
354+
_ => (s, None), // colon belongs to host:port — not a tag
355+
}
356+
}
357+
358+
/// Validate that every path component of the repository contains only `[a-z0-9._-]`.
359+
fn validate_repository(repo: &str) -> Result<(), ParseError> {
360+
repo.split('/').try_for_each(|component| {
361+
if !component.is_empty() {
362+
component.chars().try_for_each(validate_component_char)
363+
} else {
364+
Err(ParseError::ReferenceInvalidFormat)
365+
}
366+
})
367+
}
368+
369+
fn validate_component_char(c: char) -> Result<(), ParseError> {
370+
if c.is_ascii_uppercase() {
371+
Err(ParseError::NameContainsUppercase)
372+
} else if !c.is_ascii_alphanumeric() && c != '.' && c != '_' && c != '-' {
373+
Err(ParseError::ReferenceInvalidFormat)
374+
} else {
375+
Ok(())
376+
}
377+
}
378+
379+
/// Validate a digest string of the form `<algorithm>:<hex>`.
380+
fn validate_digest(digest: &str) -> Result<(), ParseError> {
381+
use ParseError::*;
382+
match digest.split_once(':') {
383+
Some(("sha256", hex)) if hex.len() == 64 => Ok(()),
384+
Some(("sha384", hex)) if hex.len() == 96 => Ok(()),
385+
Some(("sha512", hex)) if hex.len() == 128 => Ok(()),
386+
Some(("sha256", _)) | Some(("sha384", _)) | Some(("sha512", _)) => Err(DigestInvalidLength),
387+
Some(_) => Err(DigestUnsupported),
388+
None => Err(DigestInvalidFormat),
389+
}
390+
}
391+
368392
#[cfg(test)]
369393
mod test {
370394
use super::*;
@@ -418,13 +442,11 @@ mod test {
418442
case("@sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", ParseError::ReferenceInvalidFormat),
419443
case("repo@sha256:ffffffffffffffffffffffffffffffffff", ParseError::DigestInvalidLength),
420444
case("validname@invaliddigest:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", ParseError::DigestUnsupported),
421-
// FIXME: should really pass a ParseError::NameContainsUppercase, but "invalid format" is good enough for now.
422-
case("Uppercase:tag", ParseError::ReferenceInvalidFormat),
445+
case("Uppercase:tag", ParseError::NameContainsUppercase),
423446
// FIXME: "Uppercase" is incorrectly handled as a domain-name here, and therefore passes.
424447
// https://github.com/docker/distribution/blob/master/reference/reference_test.go#L104-L109
425448
// case("Uppercase/lowercase:tag", ParseError::NameContainsUppercase),
426-
// FIXME: should really pass a ParseError::NameContainsUppercase, but "invalid format" is good enough for now.
427-
case("test:5000/Uppercase/lowercase:tag", ParseError::ReferenceInvalidFormat),
449+
case("test:5000/Uppercase/lowercase:tag", ParseError::NameContainsUppercase),
428450
case("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", ParseError::NameTooLong),
429451
case("aa/asdf$$^/aa", ParseError::ReferenceInvalidFormat)
430452
)]

0 commit comments

Comments
 (0)