Skip to content

Commit bcacc0e

Browse files
committed
Merge branch 'jk/url-decode'
* jk/url-decode: decode file:// and ssh:// URLs make url-related functions reusable
2 parents 1c5d6b2 + 9d2e942 commit bcacc0e

File tree

7 files changed

+153
-106
lines changed

7 files changed

+153
-106
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,7 @@ LIB_OBJS += tree-diff.o
627627
LIB_OBJS += tree.o
628628
LIB_OBJS += tree-walk.o
629629
LIB_OBJS += unpack-trees.o
630+
LIB_OBJS += url.o
630631
LIB_OBJS += usage.o
631632
LIB_OBJS += userdiff.o
632633
LIB_OBJS += utf8.o

connect.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "refs.h"
66
#include "run-command.h"
77
#include "remote.h"
8+
#include "url.h"
89

910
static char *server_capabilities;
1011

@@ -450,7 +451,7 @@ static struct child_process no_fork;
450451
struct child_process *git_connect(int fd[2], const char *url_orig,
451452
const char *prog, int flags)
452453
{
453-
char *url = xstrdup(url_orig);
454+
char *url;
454455
char *host, *path;
455456
char *end;
456457
int c;
@@ -466,6 +467,11 @@ struct child_process *git_connect(int fd[2], const char *url_orig,
466467
*/
467468
signal(SIGCHLD, SIG_DFL);
468469

470+
if (is_url(url_orig))
471+
url = url_decode(url_orig);
472+
else
473+
url = xstrdup(url_orig);
474+
469475
host = strstr(url, "://");
470476
if (host) {
471477
*host = '\0';

http-backend.c

Lines changed: 3 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "exec_cmd.h"
77
#include "run-command.h"
88
#include "string-list.h"
9+
#include "url.h"
910

1011
static const char content_type[] = "Content-Type";
1112
static const char content_length[] = "Content-Length";
@@ -25,69 +26,15 @@ static struct rpc_service rpc_service[] = {
2526
{ "receive-pack", "receivepack", -1 },
2627
};
2728

28-
static int decode_char(const char *q)
29-
{
30-
int i;
31-
unsigned char val = 0;
32-
for (i = 0; i < 2; i++) {
33-
unsigned char c = *q++;
34-
val <<= 4;
35-
if (c >= '0' && c <= '9')
36-
val += c - '0';
37-
else if (c >= 'a' && c <= 'f')
38-
val += c - 'a' + 10;
39-
else if (c >= 'A' && c <= 'F')
40-
val += c - 'A' + 10;
41-
else
42-
return -1;
43-
}
44-
return val;
45-
}
46-
47-
static char *decode_parameter(const char **query, int is_name)
48-
{
49-
const char *q = *query;
50-
struct strbuf out;
51-
52-
strbuf_init(&out, 16);
53-
do {
54-
unsigned char c = *q;
55-
56-
if (!c)
57-
break;
58-
if (c == '&' || (is_name && c == '=')) {
59-
q++;
60-
break;
61-
}
62-
63-
if (c == '%') {
64-
int val = decode_char(q + 1);
65-
if (0 <= val) {
66-
strbuf_addch(&out, val);
67-
q += 3;
68-
continue;
69-
}
70-
}
71-
72-
if (c == '+')
73-
strbuf_addch(&out, ' ');
74-
else
75-
strbuf_addch(&out, c);
76-
q++;
77-
} while (1);
78-
*query = q;
79-
return strbuf_detach(&out, NULL);
80-
}
81-
8229
static struct string_list *get_parameters(void)
8330
{
8431
if (!query_params) {
8532
const char *query = getenv("QUERY_STRING");
8633

8734
query_params = xcalloc(1, sizeof(*query_params));
8835
while (query && *query) {
89-
char *name = decode_parameter(&query, 1);
90-
char *value = decode_parameter(&query, 0);
36+
char *name = url_decode_parameter_name(&query);
37+
char *value = url_decode_parameter_value(&query);
9138
struct string_list_item *i;
9239

9340
i = string_list_lookup(name, query_params);

t/t5601-clone.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,4 +176,16 @@ test_expect_success 'clone respects global branch.autosetuprebase' '
176176
)
177177
'
178178

179+
test_expect_success 'respect url-encoding of file://' '
180+
git init x+y &&
181+
test_must_fail git clone "file://$PWD/x+y" xy-url &&
182+
git clone "file://$PWD/x%2By" xy-url
183+
'
184+
185+
test_expect_success 'do not respect url-encoding of non-url path' '
186+
git init x+y &&
187+
test_must_fail git clone x%2By xy-regular &&
188+
git clone x+y xy-regular
189+
'
190+
179191
test_done

transport.c

Lines changed: 2 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "dir.h"
1010
#include "refs.h"
1111
#include "branch.h"
12+
#include "url.h"
1213

1314
/* rsync support */
1415

@@ -871,54 +872,6 @@ static int is_file(const char *url)
871872
return S_ISREG(buf.st_mode);
872873
}
873874

874-
static int isurlschemechar(int first_flag, int ch)
875-
{
876-
/*
877-
* The set of valid URL schemes, as per STD66 (RFC3986) is
878-
* '[A-Za-z][A-Za-z0-9+.-]*'. But use sightly looser check
879-
* of '[A-Za-z0-9][A-Za-z0-9+.-]*' because earlier version
880-
* of check used '[A-Za-z0-9]+' so not to break any remote
881-
* helpers.
882-
*/
883-
int alphanumeric, special;
884-
alphanumeric = ch > 0 && isalnum(ch);
885-
special = ch == '+' || ch == '-' || ch == '.';
886-
return alphanumeric || (!first_flag && special);
887-
}
888-
889-
static int is_url(const char *url)
890-
{
891-
const char *url2, *first_slash;
892-
893-
if (!url)
894-
return 0;
895-
url2 = url;
896-
first_slash = strchr(url, '/');
897-
898-
/* Input with no slash at all or slash first can't be URL. */
899-
if (!first_slash || first_slash == url)
900-
return 0;
901-
/* Character before must be : and next must be /. */
902-
if (first_slash[-1] != ':' || first_slash[1] != '/')
903-
return 0;
904-
/* There must be something before the :// */
905-
if (first_slash == url + 1)
906-
return 0;
907-
/*
908-
* Check all characters up to first slash - 1. Only alphanum
909-
* is allowed.
910-
*/
911-
url2 = url;
912-
while (url2 < first_slash - 1) {
913-
if (!isurlschemechar(url2 == url, (unsigned char)*url2))
914-
return 0;
915-
url2++;
916-
}
917-
918-
/* Valid enough. */
919-
return 1;
920-
}
921-
922875
static int external_specification_len(const char *url)
923876
{
924877
return strchr(url, ':') - url;
@@ -946,7 +899,7 @@ struct transport *transport_get(struct remote *remote, const char *url)
946899
if (url) {
947900
const char *p = url;
948901

949-
while (isurlschemechar(p == url, *p))
902+
while (is_urlschemechar(p == url, *p))
950903
p++;
951904
if (!prefixcmp(p, "::"))
952905
helper = xstrndup(url, p - url);

url.c

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
#include "cache.h"
2+
3+
int is_urlschemechar(int first_flag, int ch)
4+
{
5+
/*
6+
* The set of valid URL schemes, as per STD66 (RFC3986) is
7+
* '[A-Za-z][A-Za-z0-9+.-]*'. But use sightly looser check
8+
* of '[A-Za-z0-9][A-Za-z0-9+.-]*' because earlier version
9+
* of check used '[A-Za-z0-9]+' so not to break any remote
10+
* helpers.
11+
*/
12+
int alphanumeric, special;
13+
alphanumeric = ch > 0 && isalnum(ch);
14+
special = ch == '+' || ch == '-' || ch == '.';
15+
return alphanumeric || (!first_flag && special);
16+
}
17+
18+
int is_url(const char *url)
19+
{
20+
const char *url2, *first_slash;
21+
22+
if (!url)
23+
return 0;
24+
url2 = url;
25+
first_slash = strchr(url, '/');
26+
27+
/* Input with no slash at all or slash first can't be URL. */
28+
if (!first_slash || first_slash == url)
29+
return 0;
30+
/* Character before must be : and next must be /. */
31+
if (first_slash[-1] != ':' || first_slash[1] != '/')
32+
return 0;
33+
/* There must be something before the :// */
34+
if (first_slash == url + 1)
35+
return 0;
36+
/*
37+
* Check all characters up to first slash - 1. Only alphanum
38+
* is allowed.
39+
*/
40+
url2 = url;
41+
while (url2 < first_slash - 1) {
42+
if (!is_urlschemechar(url2 == url, (unsigned char)*url2))
43+
return 0;
44+
url2++;
45+
}
46+
47+
/* Valid enough. */
48+
return 1;
49+
}
50+
51+
static int url_decode_char(const char *q)
52+
{
53+
int i;
54+
unsigned char val = 0;
55+
for (i = 0; i < 2; i++) {
56+
unsigned char c = *q++;
57+
val <<= 4;
58+
if (c >= '0' && c <= '9')
59+
val += c - '0';
60+
else if (c >= 'a' && c <= 'f')
61+
val += c - 'a' + 10;
62+
else if (c >= 'A' && c <= 'F')
63+
val += c - 'A' + 10;
64+
else
65+
return -1;
66+
}
67+
return val;
68+
}
69+
70+
static char *url_decode_internal(const char **query, const char *stop_at)
71+
{
72+
const char *q = *query;
73+
struct strbuf out;
74+
75+
strbuf_init(&out, 16);
76+
do {
77+
unsigned char c = *q;
78+
79+
if (!c)
80+
break;
81+
if (stop_at && strchr(stop_at, c)) {
82+
q++;
83+
break;
84+
}
85+
86+
if (c == '%') {
87+
int val = url_decode_char(q + 1);
88+
if (0 <= val) {
89+
strbuf_addch(&out, val);
90+
q += 3;
91+
continue;
92+
}
93+
}
94+
95+
if (c == '+')
96+
strbuf_addch(&out, ' ');
97+
else
98+
strbuf_addch(&out, c);
99+
q++;
100+
} while (1);
101+
*query = q;
102+
return strbuf_detach(&out, NULL);
103+
}
104+
105+
char *url_decode(const char *url)
106+
{
107+
return url_decode_internal(&url, NULL);
108+
}
109+
110+
char *url_decode_parameter_name(const char **query)
111+
{
112+
return url_decode_internal(query, "&=");
113+
}
114+
115+
char *url_decode_parameter_value(const char **query)
116+
{
117+
return url_decode_internal(query, "&");
118+
}

url.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#ifndef URL_H
2+
#define URL_H
3+
4+
extern int is_url(const char *url);
5+
extern int is_urlschemechar(int first_flag, int ch);
6+
extern char *url_decode(const char *url);
7+
extern char *url_decode_parameter_name(const char **query);
8+
extern char *url_decode_parameter_value(const char **query);
9+
10+
#endif /* URL_H */

0 commit comments

Comments
 (0)