1- # ' Parse and build URLs
1+ # ' Parse a URL into its component pieces
22# '
3- # ' `url_parse()` parses a URL into its component pieces; `url_build()` does
4- # ' the reverse, converting a list of pieces into a string URL. See `r rfc(3986)`
5- # ' for the details of the parsing algorithm .
3+ # ' `url_parse()` parses a URL into its component parts, powered by
4+ # ' [curl::curl_parse_url()]. The parsing algorithm follows the specifications
5+ # ' detailed in `r rfc(3986)` .
66# '
7- # ' @param url For `url_parse()` a string to parse into a URL;
8- # ' for `url_build()` a URL to turn back into a string.
7+ # ' @param url A string containing the URL to parse.
98# ' @param base_url Use this as a parent, if `url` is a relative URL.
10- # ' @returns
11- # ' * `url_build()` returns a string.
12- # ' * `url_parse()` returns a URL: a S3 list with class `httr2_url`
13- # ' and elements `scheme`, `hostname`, `port`, `path`, `fragment`, `query`,
14- # ' `username`, `password`.
9+ # ' @returns An S3 object of class `httr2_url` with the following components:
10+ # ' `scheme`, `hostname`, `username`, `password`, `port`, `path`, `query`, and
11+ # ' `fragment`.
1512# ' @export
13+ # ' @family URL manipulation
1614# ' @examples
1715# ' url_parse("http://google.com/")
1816# ' url_parse("http://google.com:80/")
2220# ' # You can parse a relative URL if you also provide a base url
2321# ' url_parse("foo", "http://google.com/bar/")
2422# ' url_parse("..", "http://google.com/bar/")
25- # '
26- # ' url <- url_parse("http://google.com/")
27- # ' url$port <- 80
28- # ' url$hostname <- "example.com"
29- # ' url$query <- list(a = 1, b = 2, c = 3)
30- # ' url_build(url)
3123url_parse <- function (url , base_url = NULL ) {
3224 check_string(url )
3325 check_string(base_url , allow_null = TRUE )
@@ -48,10 +40,88 @@ url_parse <- function(url, base_url = NULL) {
4840 parsed
4941}
5042
51- url_modify <- function (url , ... , error_call = caller_env()) {
52- url <- url_parse(url )
53- url <- modify_list(url , ... , error_call = error_call )
54- url_build(url )
43+ # ' Modify a URL
44+ # '
45+ # ' Modify components of a URL. The default value of each argument, `NULL`,
46+ # ' means leave the component as is. If you want to remove a component,
47+ # ' set it to `""`. Note that setting `scheme` or `hostname` to `""` will
48+ # ' create a relative URL.
49+ # '
50+ # ' @param url A string or [parsed URL](url_parse).
51+ # ' @param scheme The scheme, typically either `http` or `https`.
52+ # ' @param hostname The hostname, e.g., `www.google.com` or `posit.co`.
53+ # ' @param username,password Username and password to embed in the URL.
54+ # ' Not generally recommended but needed for some legacy applications.
55+ # ' @param port An integer port number.
56+ # ' @param path The path, e.g., `/search`. Paths must start with `/`, so this
57+ # ' will be automatically added if omitted.
58+ # ' @param query Either a query string or a named list of query components.
59+ # ' @param fragment The fragment, e.g., `#section-1`.
60+ # ' @return An object of the same type as `url`.
61+ # ' @export
62+ # ' @family URL manipulation
63+ # ' @examples
64+ # ' url_modify("http://hadley.nz", path = "about")
65+ # ' url_modify("http://hadley.nz", scheme = "https")
66+ # ' url_modify("http://hadley.nz/abc", path = "/cde")
67+ # ' url_modify("http://hadley.nz/abc", path = "")
68+ # ' url_modify("http://hadley.nz?a=1", query = "b=2")
69+ # ' url_modify("http://hadley.nz?a=1", query = list(c = 3))
70+ url_modify <- function (url ,
71+ scheme = NULL ,
72+ hostname = NULL ,
73+ username = NULL ,
74+ password = NULL ,
75+ port = NULL ,
76+ path = NULL ,
77+ query = NULL ,
78+ fragment = NULL ) {
79+
80+ if (! is_string(url ) && ! is_url(url )) {
81+ stop_input_type(url , " a string or parsed URL" )
82+ }
83+ string_url <- is_string(url )
84+ if (string_url ) {
85+ url <- url_parse(url )
86+ }
87+
88+ check_string(scheme , allow_null = TRUE )
89+ check_string(hostname , allow_null = TRUE )
90+ check_string(username , allow_null = TRUE )
91+ check_string(password , allow_null = TRUE )
92+ check_number_whole(port , min = 1 , allow_null = TRUE )
93+ check_string(path , allow_null = TRUE )
94+ check_string(fragment , allow_null = TRUE )
95+
96+ if (is_string(query )) {
97+ query <- query_parse(query )
98+ } else if (is.list(query ) && (is_named(query ) || length(query ) == 0 )) {
99+ for (nm in names(query )) {
100+ check_query_param(query [[nm ]], paste0(" query$" , nm ))
101+ }
102+ } else if (! is.null(query )) {
103+ stop_input_type(query , " a character vector, named list, or NULL" )
104+ }
105+
106+ new <- compact(list (
107+ scheme = scheme ,
108+ hostname = hostname ,
109+ username = username ,
110+ password = password ,
111+ port = port ,
112+ path = path ,
113+ query = query ,
114+ fragment = fragment
115+ ))
116+ is_empty <- map_lgl(new , identical , " " )
117+ new [is_empty ] <- list (NULL )
118+ url [names(new )] <- new
119+
120+ if (string_url ) {
121+ url_build(url )
122+ } else {
123+ url
124+ }
55125}
56126
57127is_url <- function (x ) inherits(x , " httr2_url" )
@@ -91,9 +161,19 @@ print.httr2_url <- function(x, ...) {
91161 invisible (x )
92162}
93163
164+ # ' Build a string from a URL object
165+ # '
166+ # ' This is the inverse of [url_parse()], taking a parsed URL object and
167+ # ' turning it back into a string.
168+ # '
169+ # ' @param url An URL object created by [url_parse].
170+ # ' @family URL manipulation
94171# ' @export
95- # ' @rdname url_parse
96172url_build <- function (url ) {
173+ if (! is_url(url )) {
174+ stop_input_type(url , " a parsed URL" )
175+ }
176+
97177 if (! is.null(url $ query )) {
98178 query <- query_build(url $ query )
99179 } else {
@@ -119,7 +199,7 @@ url_build <- function(url) {
119199 authority <- NULL
120200 }
121201
122- if (! is.null(url $ path ) && ! startsWith(url $ path , " /" )) {
202+ if (is.null(url $ path ) || ! startsWith(url $ path , " /" )) {
123203 url $ path <- paste0(" /" , url $ path )
124204 }
125205
0 commit comments