Skip to content

Commit e6f425e

Browse files
authored
Improve url manipulation tooling (#611)
* Check inputs and export `url_modify()`. Fixes #464. * Check inputs to `url_build()`. Fixes #482.
1 parent e58e86c commit e6f425e

File tree

9 files changed

+325
-53
lines changed

9 files changed

+325
-53
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ export(secret_write_rds)
144144
export(signal_total_pages)
145145
export(throttle_status)
146146
export(url_build)
147+
export(url_modify)
147148
export(url_parse)
148149
export(with_mock)
149150
export(with_mocked_responses)

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# httr2 (development version)
22

3+
* New `url_modify()` makes it easier to modify an existing url (#464).
34
* New `req_url_relative()` for constructing relative urls (#449).
45
* `url_parse()` gains `base_url` argument so you can also use it to parse relative URLs (#449).
56
* `url_parse()` now uses `curl::curl_parse_url()` which is much faster and more correct (#577).

R/url.R

Lines changed: 103 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,16 @@
1-
#' Parse and build URLs
1+
#' Parse a URL into its component pieces
22
#'
3-
#' `url_parse()` parses a URL into its component pieces; `url_build()` does
4-
#' the reverse, converting a list of pieces into a string URL. See `r rfc(3986)`
5-
#' for the details of the parsing algorithm.
3+
#' `url_parse()` parses a URL into its component parts, powered by
4+
#' [curl::curl_parse_url()]. The parsing algorithm follows the specifications
5+
#' detailed in `r rfc(3986)`.
66
#'
7-
#' @param url For `url_parse()` a string to parse into a URL;
8-
#' for `url_build()` a URL to turn back into a string.
7+
#' @param url A string containing the URL to parse.
98
#' @param base_url Use this as a parent, if `url` is a relative URL.
10-
#' @returns
11-
#' * `url_build()` returns a string.
12-
#' * `url_parse()` returns a URL: a S3 list with class `httr2_url`
13-
#' and elements `scheme`, `hostname`, `port`, `path`, `fragment`, `query`,
14-
#' `username`, `password`.
9+
#' @returns An S3 object of class `httr2_url` with the following components:
10+
#' `scheme`, `hostname`, `username`, `password`, `port`, `path`, `query`, and
11+
#' `fragment`.
1512
#' @export
13+
#' @family URL manipulation
1614
#' @examples
1715
#' url_parse("http://google.com/")
1816
#' url_parse("http://google.com:80/")
@@ -22,12 +20,6 @@
2220
#' # You can parse a relative URL if you also provide a base url
2321
#' url_parse("foo", "http://google.com/bar/")
2422
#' url_parse("..", "http://google.com/bar/")
25-
#'
26-
#' url <- url_parse("http://google.com/")
27-
#' url$port <- 80
28-
#' url$hostname <- "example.com"
29-
#' url$query <- list(a = 1, b = 2, c = 3)
30-
#' url_build(url)
3123
url_parse <- function(url, base_url = NULL) {
3224
check_string(url)
3325
check_string(base_url, allow_null = TRUE)
@@ -48,10 +40,88 @@ url_parse <- function(url, base_url = NULL) {
4840
parsed
4941
}
5042

51-
url_modify <- function(url, ..., error_call = caller_env()) {
52-
url <- url_parse(url)
53-
url <- modify_list(url, ..., error_call = error_call)
54-
url_build(url)
43+
#' Modify a URL
44+
#'
45+
#' Modify components of a URL. The default value of each argument, `NULL`,
46+
#' means leave the component as is. If you want to remove a component,
47+
#' set it to `""`. Note that setting `scheme` or `hostname` to `""` will
48+
#' create a relative URL.
49+
#'
50+
#' @param url A string or [parsed URL](url_parse).
51+
#' @param scheme The scheme, typically either `http` or `https`.
52+
#' @param hostname The hostname, e.g., `www.google.com` or `posit.co`.
53+
#' @param username,password Username and password to embed in the URL.
54+
#' Not generally recommended but needed for some legacy applications.
55+
#' @param port An integer port number.
56+
#' @param path The path, e.g., `/search`. Paths must start with `/`, so this
57+
#' will be automatically added if omitted.
58+
#' @param query Either a query string or a named list of query components.
59+
#' @param fragment The fragment, e.g., `#section-1`.
60+
#' @return An object of the same type as `url`.
61+
#' @export
62+
#' @family URL manipulation
63+
#' @examples
64+
#' url_modify("http://hadley.nz", path = "about")
65+
#' url_modify("http://hadley.nz", scheme = "https")
66+
#' url_modify("http://hadley.nz/abc", path = "/cde")
67+
#' url_modify("http://hadley.nz/abc", path = "")
68+
#' url_modify("http://hadley.nz?a=1", query = "b=2")
69+
#' url_modify("http://hadley.nz?a=1", query = list(c = 3))
70+
url_modify <- function(url,
71+
scheme = NULL,
72+
hostname = NULL,
73+
username = NULL,
74+
password = NULL,
75+
port = NULL,
76+
path = NULL,
77+
query = NULL,
78+
fragment = NULL) {
79+
80+
if (!is_string(url) && !is_url(url)) {
81+
stop_input_type(url, "a string or parsed URL")
82+
}
83+
string_url <- is_string(url)
84+
if (string_url) {
85+
url <- url_parse(url)
86+
}
87+
88+
check_string(scheme, allow_null = TRUE)
89+
check_string(hostname, allow_null = TRUE)
90+
check_string(username, allow_null = TRUE)
91+
check_string(password, allow_null = TRUE)
92+
check_number_whole(port, min = 1, allow_null = TRUE)
93+
check_string(path, allow_null = TRUE)
94+
check_string(fragment, allow_null = TRUE)
95+
96+
if (is_string(query)) {
97+
query <- query_parse(query)
98+
} else if (is.list(query) && (is_named(query) || length(query) == 0)) {
99+
for (nm in names(query)) {
100+
check_query_param(query[[nm]], paste0("query$", nm))
101+
}
102+
} else if (!is.null(query)) {
103+
stop_input_type(query, "a character vector, named list, or NULL")
104+
}
105+
106+
new <- compact(list(
107+
scheme = scheme,
108+
hostname = hostname,
109+
username = username,
110+
password = password,
111+
port = port,
112+
path = path,
113+
query = query,
114+
fragment = fragment
115+
))
116+
is_empty <- map_lgl(new, identical, "")
117+
new[is_empty] <- list(NULL)
118+
url[names(new)] <- new
119+
120+
if (string_url) {
121+
url_build(url)
122+
} else {
123+
url
124+
}
55125
}
56126

57127
is_url <- function(x) inherits(x, "httr2_url")
@@ -91,9 +161,19 @@ print.httr2_url <- function(x, ...) {
91161
invisible(x)
92162
}
93163

164+
#' Build a string from a URL object
165+
#'
166+
#' This is the inverse of [url_parse()], taking a parsed URL object and
167+
#' turning it back into a string.
168+
#'
169+
#' @param url An URL object created by [url_parse].
170+
#' @family URL manipulation
94171
#' @export
95-
#' @rdname url_parse
96172
url_build <- function(url) {
173+
if (!is_url(url)) {
174+
stop_input_type(url, "a parsed URL")
175+
}
176+
97177
if (!is.null(url$query)) {
98178
query <- query_build(url$query)
99179
} else {
@@ -119,7 +199,7 @@ url_build <- function(url) {
119199
authority <- NULL
120200
}
121201

122-
if (!is.null(url$path) && !startsWith(url$path, "/")) {
202+
if (is.null(url$path) || !startsWith(url$path, "/")) {
123203
url$path <- paste0("/", url$path)
124204
}
125205

_pkgdown.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,15 @@ reference:
7070
contents:
7171
- starts_with("resp_")
7272

73+
- title: URL manipulation
74+
contents:
75+
- starts_with("url_")
76+
7377
- title: Miscellaenous helpers
7478
contents:
7579
- curl_translate
7680
- secrets
7781
- obfuscate
78-
- url_parse
7982

8083
- title: OAuth
8184
desc: >

man/url_build.Rd

Lines changed: 21 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/url_modify.Rd

Lines changed: 60 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/url_parse.Rd

Lines changed: 14 additions & 21 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)