11#include < Python.h>
22#include < windows.h>
3+ #include < shlwapi.h>
34#include < winhttp.h>
45#include < netlistmgr.h>
56
@@ -108,7 +109,7 @@ static wchar_t **split_to_array(wchar_t *str, wchar_t sep) {
108109}
109110
110111
111- static int crack_url (wchar_t *url, URL_COMPONENTS *parts, int add_nuls ) {
112+ static int crack_url (const wchar_t *url, URL_COMPONENTS *parts) {
112113 parts->lpszScheme = NULL ;
113114 parts->lpszUserName = NULL ;
114115 parts->lpszPassword = NULL ;
@@ -125,29 +126,66 @@ static int crack_url(wchar_t *url, URL_COMPONENTS *parts, int add_nuls) {
125126 winhttp_error ();
126127 return 0 ;
127128 }
128- if (add_nuls) {
129- if (parts->lpszScheme && parts->dwSchemeLength > 0 ) {
130- parts->lpszScheme [parts->dwSchemeLength ] = L' \0 ' ;
131- }
132- if (parts->lpszUserName && parts->dwUserNameLength > 0 ) {
133- parts->lpszUserName [parts->dwUserNameLength ] = L' \0 ' ;
134- }
135- if (parts->lpszPassword && parts->dwPasswordLength > 0 ) {
136- parts->lpszPassword [parts->dwPasswordLength ] = L' \0 ' ;
137- }
138- if (parts->lpszHostName && parts->dwHostNameLength > 0 ) {
139- parts->lpszHostName [parts->dwHostNameLength ] = L' \0 ' ;
140- }
141- if (parts->lpszUrlPath && parts->dwUrlPathLength > 0 ) {
142- parts->lpszUrlPath [parts->dwUrlPathLength ] = L' \0 ' ;
129+ return 1 ;
130+ }
131+
132+ static wchar_t *_recode_url_part (bool encode, const wchar_t *url_part, DWORD cch, bool allow_env=false )
133+ {
134+ if (!url_part) {
135+ return NULL ;
136+ }
137+ // Need to copy the incoming string to ensure it's null terminated
138+ cch += 1 ;
139+ wchar_t *url_string = (wchar_t *)PyMem_Malloc (sizeof (wchar_t ) * cch);
140+ if (!url_string) {
141+ PyErr_NoMemory ();
142+ return NULL ;
143+ }
144+ wcsncpy_s (url_string, cch, url_part, cch - 1 );
145+ if (!url_string[0 ] || cch > 32767 ) {
146+ // Too long/empty for the API, just bail out
147+ return url_string;
148+ }
149+ if (allow_env && cch > 2 && url_string[0 ] == L' %' && url_string[cch - 2 ] == L' %' ) {
150+ // Looks like an environment variable, so we won't change it.
151+ return url_string;
152+ }
153+
154+ wchar_t *result = NULL ;
155+ HRESULT r = E_POINTER;
156+ for (int retries = 3 ; retries > 0 && r == E_POINTER; --retries) {
157+ result = (wchar_t *)PyMem_Realloc (result, sizeof (wchar_t ) * cch);
158+ if (!result) {
159+ PyMem_Free (url_string);
160+ PyErr_NoMemory ();
161+ return NULL ;
143162 }
144- if (parts->lpszExtraInfo && parts->dwExtraInfoLength > 0 ) {
145- parts->lpszExtraInfo [parts->dwExtraInfoLength ] = L' \0 ' ;
163+ if (encode) {
164+ // "SEGMENT_ONLY" means we want to escape the entire string
165+ r = UrlEscapeW (url_string, result, &cch, URL_ESCAPE_SEGMENT_ONLY | URL_ESCAPE_ASCII_URI_COMPONENT);
166+ } else {
167+ r = UrlUnescapeW (url_string, result, &cch, 0 );
146168 }
147169 }
148- return 1 ;
170+ PyMem_Free (url_string);
171+ if (r) {
172+ err_SetFromWindowsErrWithMessage ((DWORD)r);
173+ return NULL ;
174+ }
175+ return result;
176+ }
177+
178+ static wchar_t *encode_url_part (const wchar_t *url_part, DWORD cch, bool allow_env=false )
179+ {
180+ return _recode_url_part (true , url_part, cch, allow_env);
149181}
150182
183+ static wchar_t *decode_url_part (const wchar_t *url_part, DWORD cch, bool allow_env=false )
184+ {
185+ return _recode_url_part (false , url_part, cch, allow_env);
186+ }
187+
188+
151189extern " C" {
152190
153191#define CHECK_WINHTTP (x ) if (!x) { winhttp_error (); goto exit; }
@@ -226,7 +264,6 @@ static bool winhttp_apply_proxy(HINTERNET hSession, HINTERNET hRequest, const wc
226264PyObject *winhttp_urlopen (PyObject *, PyObject *args, PyObject *kwargs) {
227265 static const char * keywords[] = {" url" , " method" , " headers" , " accepts" , " chunksize" , " on_progress" , " on_cred_request" , NULL };
228266 wchar_t *url = NULL ;
229- wchar_t *url2 = NULL ; // a copy of url for splitting
230267 wchar_t *method = NULL ;
231268 wchar_t *headers = NULL ;
232269 wchar_t *accepts = NULL ;
@@ -246,7 +283,11 @@ PyObject *winhttp_urlopen(PyObject *, PyObject *args, PyObject *kwargs) {
246283 uint64_t content_length;
247284 PyObject *chunks = NULL ;
248285 uint64_t content_read = 0 ;
249- size_t n = 0 ;
286+
287+ wchar_t *hostname = NULL ;
288+ wchar_t *urlpath = NULL ;
289+ wchar_t *user = NULL ;
290+ wchar_t *pass = NULL ;
250291
251292 if (!PyArg_ParseTupleAndKeywords (args, kwargs, " O&O&O&O&|nOO:winhttp_urlopen" , keywords,
252293 as_utf16, &url, as_utf16, &method, as_utf16, &headers, as_utf16, &accepts, &chunksize, &on_progress, &on_cred_request)) {
@@ -264,17 +305,19 @@ PyObject *winhttp_urlopen(PyObject *, PyObject *args, PyObject *kwargs) {
264305 if (!accepts_array) {
265306 goto exit;
266307 }
267- n = wcslen (url) + 1 ;
268- url2 = (wchar_t *)PyMem_Malloc (n * sizeof (wchar_t ));
269- if (!url2) {
270- PyErr_NoMemory ();
308+ if (!crack_url (url, &url_parts)) {
309+ goto exit;
310+ }
311+ hostname = decode_url_part (url_parts.lpszHostName , url_parts.dwHostNameLength );
312+ if (!hostname) {
271313 goto exit;
272314 }
273- wcscpy_s (url2, n, url );
274- if (!crack_url (url2, &url_parts, 1 ) ) {
315+ urlpath = decode_url_part (url_parts. lpszUrlPath , url_parts. dwUrlPathLength );
316+ if (!urlpath ) {
275317 goto exit;
276318 }
277319
320+
278321 hSession = WinHttpOpen (
279322 NULL ,
280323 WINHTTP_ACCESS_TYPE_DEFAULT_PROXY,
@@ -310,19 +353,16 @@ PyObject *winhttp_urlopen(PyObject *, PyObject *args, PyObject *kwargs) {
310353
311354 hConnection = WinHttpConnect (
312355 hSession,
313- url_parts. lpszHostName ,
356+ hostname ,
314357 url_parts.nPort ,
315358 0
316359 );
317360 CHECK_WINHTTP (hConnection);
318361
319- if (url_parts.dwUrlPathLength && !url_parts.lpszUrlPath [0 ]) {
320- url_parts.lpszUrlPath [0 ] = L' /' ;
321- }
322362 hRequest = WinHttpOpenRequest (
323363 hConnection,
324364 method,
325- url_parts. lpszUrlPath ,
365+ urlpath && urlpath[ 0 ] ? urlpath : L" / " ,
326366 NULL ,
327367 WINHTTP_NO_REFERER,
328368 accepts_array,
@@ -341,12 +381,20 @@ PyObject *winhttp_urlopen(PyObject *, PyObject *args, PyObject *kwargs) {
341381 ));
342382
343383 if (url_parts.dwUserNameLength || url_parts.dwPasswordLength ) {
384+ user = decode_url_part (url_parts.lpszUserName , url_parts.dwUserNameLength );
385+ if (!user) {
386+ goto exit;
387+ }
388+ pass = decode_url_part (url_parts.lpszPassword , url_parts.dwPasswordLength );
389+ if (!pass) {
390+ goto exit;
391+ }
344392 CHECK_WINHTTP (WinHttpSetCredentials (
345393 hRequest,
346394 WINHTTP_AUTH_TARGET_SERVER,
347395 WINHTTP_AUTH_SCHEME_BASIC,
348- url_parts. lpszUserName ,
349- url_parts. lpszPassword ,
396+ user ,
397+ pass ,
350398 NULL
351399 ));
352400 }
@@ -464,11 +512,14 @@ PyObject *winhttp_urlopen(PyObject *, PyObject *args, PyObject *kwargs) {
464512 if (hSession) {
465513 WinHttpCloseHandle (hSession);
466514 }
515+ PyMem_Free (user);
516+ PyMem_Free (pass);
517+ PyMem_Free (hostname);
518+ PyMem_Free (urlpath);
467519 PyMem_Free (accepts_array);
468520 PyMem_Free (accepts);
469521 PyMem_Free (headers);
470522 PyMem_Free (method);
471- PyMem_Free (url2);
472523 PyMem_Free (url);
473524 return result;
474525}
@@ -510,19 +561,26 @@ PyObject *winhttp_urlsplit(PyObject *, PyObject *args, PyObject *kwargs) {
510561 return NULL ;
511562 }
512563 URL_COMPONENTS url_parts = { sizeof (URL_COMPONENTS) };
513- if (!crack_url (url, &url_parts, 0 )) {
564+ if (!crack_url (url, &url_parts)) {
514565 PyMem_Free (url);
515566 return NULL ;
516567 }
568+ // Deliberately not decoding host or path. We never use a blacklist, we only
569+ // match against values specified by the user, or pass it to. If they want
570+ // to provide the same URL with different encoding, that's their fault.
571+ wchar_t *user = decode_url_part (url_parts.lpszUserName , url_parts.dwUserNameLength , true );
572+ wchar_t *pass = decode_url_part (url_parts.lpszPassword , url_parts.dwPasswordLength , true );
517573 PyObject *r = Py_BuildValue (" (u#u#u#u#nu#u#)" ,
518574 url_parts.lpszScheme , (Py_ssize_t)url_parts.dwSchemeLength ,
519- url_parts. lpszUserName , (Py_ssize_t)url_parts. dwUserNameLength ,
520- url_parts. lpszPassword , (Py_ssize_t)url_parts. dwPasswordLength ,
575+ user , (Py_ssize_t)(user ? wcslen (user) : 0 ) ,
576+ pass , (Py_ssize_t)(pass ? wcslen (pass) : 0 ) ,
521577 url_parts.lpszHostName , (Py_ssize_t)url_parts.dwHostNameLength ,
522578 (Py_ssize_t)url_parts.nPort ,
523579 url_parts.lpszUrlPath , (Py_ssize_t)url_parts.dwUrlPathLength ,
524580 url_parts.lpszExtraInfo , (Py_ssize_t)url_parts.dwExtraInfoLength
525581 );
582+ PyMem_Free (user);
583+ PyMem_Free (pass);
526584 PyMem_Free (url);
527585 return r;
528586}
@@ -532,10 +590,12 @@ PyObject *winhttp_urlunsplit(PyObject *, PyObject *args, PyObject *kwargs) {
532590 static const char * keywords[] = {" scheme" , " user" , " password" , " netloc" , " port" , " path" , " extra" , NULL };
533591 URL_COMPONENTS url = { sizeof (URL_COMPONENTS) };
534592 Py_ssize_t port = 0 ;
593+ wchar_t *user = NULL ;
594+ wchar_t *pass = NULL ;
535595 if (!PyArg_ParseTupleAndKeywords (args, kwargs, " O&O&O&O&nO&O&:winhttp_urlunsplit" , keywords,
536596 as_utf16, &url.lpszScheme ,
537- as_utf16, &url. lpszUserName ,
538- as_utf16, &url. lpszPassword ,
597+ as_utf16, &user ,
598+ as_utf16, &pass ,
539599 as_utf16, &url.lpszHostName ,
540600 &port,
541601 as_utf16, &url.lpszUrlPath ,
@@ -545,8 +605,16 @@ PyObject *winhttp_urlunsplit(PyObject *, PyObject *args, PyObject *kwargs) {
545605 }
546606 DWORD cch = 0 ;
547607 PyObject *r = NULL ;
608+ url.lpszUserName = encode_url_part (user, user ? wcslen (user) : 0 , true );
609+ if (user && !url.lpszUserName ) {
610+ goto exit;
611+ }
612+ url.lpszPassword = encode_url_part (pass, pass ? wcslen (pass) : 0 , true );
613+ if (pass && !url.lpszPassword ) {
614+ goto exit;
615+ }
548616 url.nPort = (INTERNET_PORT)port;
549- if (WinHttpCreateUrl (&url, ICU_ESCAPE , NULL , &cch)) {
617+ if (WinHttpCreateUrl (&url, 0 , NULL , &cch)) {
550618 // Success path, because it should've failed with ERROR_INSUFFICIENT_BUFFER
551619 PyErr_SetString (PyExc_ValueError, " unable to unsplit URL" );
552620 } else if (GetLastError () != ERROR_INSUFFICIENT_BUFFER) {
@@ -556,14 +624,17 @@ PyObject *winhttp_urlunsplit(PyObject *, PyObject *args, PyObject *kwargs) {
556624 wchar_t *buf = (wchar_t *)PyMem_Malloc (cch * sizeof (wchar_t ));
557625 if (!buf) {
558626 PyErr_NoMemory ();
559- } else if (!WinHttpCreateUrl (&url, ICU_ESCAPE , buf, &cch)) {
627+ } else if (!WinHttpCreateUrl (&url, 0 , buf, &cch)) {
560628 winhttp_error ();
561629 PyMem_Free (buf);
562630 } else {
563631 r = PyUnicode_FromWideChar (buf, cch);
564632 PyMem_Free (buf);
565633 }
566634 }
635+ exit:
636+ PyMem_Free (user);
637+ PyMem_Free (pass);
567638 PyMem_Free (url.lpszScheme );
568639 PyMem_Free (url.lpszUserName );
569640 PyMem_Free (url.lpszPassword );
0 commit comments