Skip to content

Commit 638794c

Browse files
peffgitster
authored andcommitted
make url-related functions reusable
The is_url function and url percent-decoding functions were static, but are generally useful. Let's make them available to other parts of the code. Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 770c541 commit 638794c

File tree

5 files changed

+134
-105
lines changed

5 files changed

+134
-105
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,7 @@ LIB_OBJS += tree-diff.o
627627
LIB_OBJS += tree.o
628628
LIB_OBJS += tree-walk.o
629629
LIB_OBJS += unpack-trees.o
630+
LIB_OBJS += url.o
630631
LIB_OBJS += usage.o
631632
LIB_OBJS += userdiff.o
632633
LIB_OBJS += utf8.o

http-backend.c

Lines changed: 3 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "exec_cmd.h"
77
#include "run-command.h"
88
#include "string-list.h"
9+
#include "url.h"
910

1011
static const char content_type[] = "Content-Type";
1112
static const char content_length[] = "Content-Length";
@@ -25,69 +26,15 @@ static struct rpc_service rpc_service[] = {
2526
{ "receive-pack", "receivepack", -1 },
2627
};
2728

28-
static int decode_char(const char *q)
29-
{
30-
int i;
31-
unsigned char val = 0;
32-
for (i = 0; i < 2; i++) {
33-
unsigned char c = *q++;
34-
val <<= 4;
35-
if (c >= '0' && c <= '9')
36-
val += c - '0';
37-
else if (c >= 'a' && c <= 'f')
38-
val += c - 'a' + 10;
39-
else if (c >= 'A' && c <= 'F')
40-
val += c - 'A' + 10;
41-
else
42-
return -1;
43-
}
44-
return val;
45-
}
46-
47-
static char *decode_parameter(const char **query, int is_name)
48-
{
49-
const char *q = *query;
50-
struct strbuf out;
51-
52-
strbuf_init(&out, 16);
53-
do {
54-
unsigned char c = *q;
55-
56-
if (!c)
57-
break;
58-
if (c == '&' || (is_name && c == '=')) {
59-
q++;
60-
break;
61-
}
62-
63-
if (c == '%') {
64-
int val = decode_char(q + 1);
65-
if (0 <= val) {
66-
strbuf_addch(&out, val);
67-
q += 3;
68-
continue;
69-
}
70-
}
71-
72-
if (c == '+')
73-
strbuf_addch(&out, ' ');
74-
else
75-
strbuf_addch(&out, c);
76-
q++;
77-
} while (1);
78-
*query = q;
79-
return strbuf_detach(&out, NULL);
80-
}
81-
8229
static struct string_list *get_parameters(void)
8330
{
8431
if (!query_params) {
8532
const char *query = getenv("QUERY_STRING");
8633

8734
query_params = xcalloc(1, sizeof(*query_params));
8835
while (query && *query) {
89-
char *name = decode_parameter(&query, 1);
90-
char *value = decode_parameter(&query, 0);
36+
char *name = url_decode_parameter_name(&query);
37+
char *value = url_decode_parameter_value(&query);
9138
struct string_list_item *i;
9239

9340
i = string_list_lookup(name, query_params);

transport.c

Lines changed: 2 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "dir.h"
1010
#include "refs.h"
1111
#include "branch.h"
12+
#include "url.h"
1213

1314
/* rsync support */
1415

@@ -871,54 +872,6 @@ static int is_file(const char *url)
871872
return S_ISREG(buf.st_mode);
872873
}
873874

874-
static int isurlschemechar(int first_flag, int ch)
875-
{
876-
/*
877-
* The set of valid URL schemes, as per STD66 (RFC3986) is
878-
* '[A-Za-z][A-Za-z0-9+.-]*'. But use sightly looser check
879-
* of '[A-Za-z0-9][A-Za-z0-9+.-]*' because earlier version
880-
* of check used '[A-Za-z0-9]+' so not to break any remote
881-
* helpers.
882-
*/
883-
int alphanumeric, special;
884-
alphanumeric = ch > 0 && isalnum(ch);
885-
special = ch == '+' || ch == '-' || ch == '.';
886-
return alphanumeric || (!first_flag && special);
887-
}
888-
889-
static int is_url(const char *url)
890-
{
891-
const char *url2, *first_slash;
892-
893-
if (!url)
894-
return 0;
895-
url2 = url;
896-
first_slash = strchr(url, '/');
897-
898-
/* Input with no slash at all or slash first can't be URL. */
899-
if (!first_slash || first_slash == url)
900-
return 0;
901-
/* Character before must be : and next must be /. */
902-
if (first_slash[-1] != ':' || first_slash[1] != '/')
903-
return 0;
904-
/* There must be something before the :// */
905-
if (first_slash == url + 1)
906-
return 0;
907-
/*
908-
* Check all characters up to first slash - 1. Only alphanum
909-
* is allowed.
910-
*/
911-
url2 = url;
912-
while (url2 < first_slash - 1) {
913-
if (!isurlschemechar(url2 == url, (unsigned char)*url2))
914-
return 0;
915-
url2++;
916-
}
917-
918-
/* Valid enough. */
919-
return 1;
920-
}
921-
922875
static int external_specification_len(const char *url)
923876
{
924877
return strchr(url, ':') - url;
@@ -946,7 +899,7 @@ struct transport *transport_get(struct remote *remote, const char *url)
946899
if (url) {
947900
const char *p = url;
948901

949-
while (isurlschemechar(p == url, *p))
902+
while (is_urlschemechar(p == url, *p))
950903
p++;
951904
if (!prefixcmp(p, "::"))
952905
helper = xstrndup(url, p - url);

url.c

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
#include "cache.h"
2+
3+
int is_urlschemechar(int first_flag, int ch)
4+
{
5+
/*
6+
* The set of valid URL schemes, as per STD66 (RFC3986) is
7+
* '[A-Za-z][A-Za-z0-9+.-]*'. But use sightly looser check
8+
* of '[A-Za-z0-9][A-Za-z0-9+.-]*' because earlier version
9+
* of check used '[A-Za-z0-9]+' so not to break any remote
10+
* helpers.
11+
*/
12+
int alphanumeric, special;
13+
alphanumeric = ch > 0 && isalnum(ch);
14+
special = ch == '+' || ch == '-' || ch == '.';
15+
return alphanumeric || (!first_flag && special);
16+
}
17+
18+
int is_url(const char *url)
19+
{
20+
const char *url2, *first_slash;
21+
22+
if (!url)
23+
return 0;
24+
url2 = url;
25+
first_slash = strchr(url, '/');
26+
27+
/* Input with no slash at all or slash first can't be URL. */
28+
if (!first_slash || first_slash == url)
29+
return 0;
30+
/* Character before must be : and next must be /. */
31+
if (first_slash[-1] != ':' || first_slash[1] != '/')
32+
return 0;
33+
/* There must be something before the :// */
34+
if (first_slash == url + 1)
35+
return 0;
36+
/*
37+
* Check all characters up to first slash - 1. Only alphanum
38+
* is allowed.
39+
*/
40+
url2 = url;
41+
while (url2 < first_slash - 1) {
42+
if (!is_urlschemechar(url2 == url, (unsigned char)*url2))
43+
return 0;
44+
url2++;
45+
}
46+
47+
/* Valid enough. */
48+
return 1;
49+
}
50+
51+
static int url_decode_char(const char *q)
52+
{
53+
int i;
54+
unsigned char val = 0;
55+
for (i = 0; i < 2; i++) {
56+
unsigned char c = *q++;
57+
val <<= 4;
58+
if (c >= '0' && c <= '9')
59+
val += c - '0';
60+
else if (c >= 'a' && c <= 'f')
61+
val += c - 'a' + 10;
62+
else if (c >= 'A' && c <= 'F')
63+
val += c - 'A' + 10;
64+
else
65+
return -1;
66+
}
67+
return val;
68+
}
69+
70+
static char *url_decode_internal(const char **query, const char *stop_at)
71+
{
72+
const char *q = *query;
73+
struct strbuf out;
74+
75+
strbuf_init(&out, 16);
76+
do {
77+
unsigned char c = *q;
78+
79+
if (!c)
80+
break;
81+
if (stop_at && strchr(stop_at, c)) {
82+
q++;
83+
break;
84+
}
85+
86+
if (c == '%') {
87+
int val = url_decode_char(q + 1);
88+
if (0 <= val) {
89+
strbuf_addch(&out, val);
90+
q += 3;
91+
continue;
92+
}
93+
}
94+
95+
if (c == '+')
96+
strbuf_addch(&out, ' ');
97+
else
98+
strbuf_addch(&out, c);
99+
q++;
100+
} while (1);
101+
*query = q;
102+
return strbuf_detach(&out, NULL);
103+
}
104+
105+
char *url_decode(const char *url)
106+
{
107+
return url_decode_internal(&url, NULL);
108+
}
109+
110+
char *url_decode_parameter_name(const char **query)
111+
{
112+
return url_decode_internal(query, "&=");
113+
}
114+
115+
char *url_decode_parameter_value(const char **query)
116+
{
117+
return url_decode_internal(query, "&");
118+
}

url.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#ifndef URL_H
2+
#define URL_H
3+
4+
extern int is_url(const char *url);
5+
extern int is_urlschemechar(int first_flag, int ch);
6+
extern char *url_decode(const char *url);
7+
extern char *url_decode_parameter_name(const char **query);
8+
extern char *url_decode_parameter_value(const char **query);
9+
10+
#endif /* URL_H */

0 commit comments

Comments
 (0)