Skip to content

Commit 9c87891

Browse files
authored
[BUGFIX] response text returns junk (#30)
* fix: critical security vulnerabilities and improve proxy test coverage This commit addresses multiple security issues discovered during edge case analysis and improves test infrastructure for better reliability. Security Fixes: - Fix HTTP/1.1 body reallocation bug causing data loss (#1) * Modified realloc_body_buffer() to use current_data_size parameter * Fixes issue where response->body_len was 0 during receive * Prevents data loss when buffer needs to grow during receive - Add integer overflow protection in 8 critical locations (#7, #8) * HTTP/2 data callback buffer doubling (http2_logic.c:140) * HTTP/1.1 body buffer reallocation (http1.c:417, 549, 606) * Gzip decompression buffer expansion (compression.c:55) * Response header array growth (response.c:123) * Request header array growth (request.c:112) * Async request array growth (async_request_manager.c:171) * All checks use SIZE_MAX/2 to prevent integer overflow - Fix memory leak in DNS cache deep copy (#13) * Added proper cleanup on allocation failures in addrinfo_deep_copy() * Prevents memory leaks when malloc/strdup fails mid-operation Async HTTP Proxy Improvements: - Fix async HTTP proxy to use absolute URI for proxy requests - Add Proxy-Authorization header support for authenticated HTTP proxies - Properly distinguish between HTTP (uses absolute URI) and HTTPS (uses path) Test Infrastructure: - Add comprehensive edge case security tests (25 test cases) * Integer overflow protection tests * Memory leak prevention tests * Thread safety tests * Boundary condition tests - Add buffer reallocation regression tests (11 test cases) * Large response handling * Gzip decompression * Chunked transfer encoding * Multiple buffer doubling scenarios - Update proxy tests to use httpmorph-bin.bytetunnels.com * Added fixtures for both HTTP and HTTPS testing * HTTPS uses verify=False for self-signed certificates * Improved test reliability by using dedicated test server Results: All 371 tests pass with 14 expected skips * chore: more test cases * [FIX] Make dotenv import optional in test files for CI compatibility Fix ModuleNotFoundError in CI environments where python-dotenv is not installed. Changes: - Wrap dotenv import in try/except block in test_buffer_reallocation.py - Wrap dotenv import in try/except block in test_edge_cases_security.py - Follow same pattern as conftest.py for optional dependency handling Impact: - Tests now work in CI without requiring python-dotenv installation - Local development still benefits from .env file loading when dotenv is available - Environment variables can be set directly in CI/CD pipelines Fixes CI failures across all workflows with: ModuleNotFoundError: No module named 'dotenv' * [FIX] Pass TEST_HTTPBIN_HOST secret to CI test workflows Add TEST_HTTPBIN_HOST environment variable to CI workflows to fix test failures. Changes: - Add TEST_HTTPBIN_HOST to workflow secrets in _test.yml - Pass TEST_HTTPBIN_HOST to test environment in _test.yml - Pass TEST_HTTPBIN_HOST from ci.yml to _test.yml workflow Impact: - Edge case security tests can now access httpmorph-bin test server in CI - Buffer reallocation tests can run in CI environment - Fixes collection errors: "TEST_HTTPBIN_HOST environment variable is not set" Related: - Works together with previous commit making dotenv import optional - TEST_HTTPBIN_HOST must be configured as repository secret in GitHub
1 parent 60b50c4 commit 9c87891

File tree

15 files changed

+930
-134
lines changed

15 files changed

+930
-134
lines changed

.github/workflows/_test.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ on:
1818
secrets:
1919
TEST_PROXY_URL:
2020
required: false
21+
TEST_HTTPBIN_HOST:
22+
required: false
2123
CODECOV_TOKEN:
2224
required: false
2325

@@ -234,6 +236,7 @@ jobs:
234236
pytest tests/ -v --cov=httpmorph --cov-report=xml -m "not proxy"
235237
env:
236238
TEST_PROXY_URL: ${{ secrets.TEST_PROXY_URL }}
239+
TEST_HTTPBIN_HOST: ${{ secrets.TEST_HTTPBIN_HOST }}
237240

238241
- name: Upload coverage
239242
if: matrix.os == inputs.primary-os && matrix.python-version == inputs.primary-python

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ jobs:
2828
primary-python: ${{ needs.load-config.outputs.primary-python }}
2929
secrets:
3030
TEST_PROXY_URL: ${{ secrets.TEST_PROXY_URL }}
31+
TEST_HTTPBIN_HOST: ${{ secrets.TEST_HTTPBIN_HOST }}
3132
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
3233

3334
# ============================================================

src/bindings/_httpmorph.pyx

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t
1111
from libc.stdlib cimport malloc, free
1212
from libc.string cimport strdup
1313
from libc.stdio cimport printf
14+
from cpython.bytes cimport PyBytes_FromStringAndSize
1415

1516
# Helper to get version for User-Agent strings
1617
def _get_httpmorph_version():
@@ -74,14 +75,18 @@ cdef extern from "../include/httpmorph.h":
7475
char *key
7576
char *value
7677

77-
# Response structure (match the header exactly)
78+
# Response structure (match the C header EXACTLY - field order matters!)
7879
struct httpmorph_response:
7980
uint16_t status_code
8081
httpmorph_version_t http_version
8182
httpmorph_header_t *headers
8283
size_t header_count
84+
size_t header_capacity
8385
uint8_t *body
8486
size_t body_len
87+
size_t body_capacity
88+
void *_buffer_pool
89+
size_t _body_actual_size
8590
uint64_t connect_time_us
8691
uint64_t tls_time_us
8792
uint64_t first_byte_time_us
@@ -215,6 +220,7 @@ cdef class Client:
215220
if req is NULL:
216221
raise MemoryError("Failed to create request")
217222

223+
cdef bytes body_bytes = b''
218224
try:
219225
# Set timeout if provided (default is 30 seconds in C code)
220226
timeout = kwargs.get('timeout')
@@ -312,11 +318,16 @@ cdef class Client:
312318
if resp is NULL:
313319
raise RuntimeError("Failed to execute request")
314320

321+
# Copy body BEFORE destroying response (important for memory safety)
322+
body_bytes = b''
323+
if resp.body and resp.body_len > 0:
324+
body_bytes = PyBytes_FromStringAndSize(<char*>resp.body, <Py_ssize_t>resp.body_len)
325+
315326
# Convert response to Python dict
316327
result = {
317328
'status_code': resp.status_code,
318329
'headers': {},
319-
'body': bytes(resp.body[:resp.body_len]) if resp.body else b'',
330+
'body': body_bytes,
320331
'http_version': resp.http_version,
321332
'connect_time_us': resp.connect_time_us,
322333
'tls_time_us': resp.tls_time_us,
@@ -472,6 +483,7 @@ cdef class Session:
472483
if req is NULL:
473484
raise MemoryError("Failed to create request")
474485

486+
cdef bytes body_bytes = b''
475487
try:
476488
# Set timeout if provided (default is 30 seconds in C code)
477489
timeout = kwargs.get('timeout')
@@ -621,11 +633,16 @@ cdef class Session:
621633
if resp is NULL:
622634
raise RuntimeError("Failed to execute request")
623635

636+
# Copy body BEFORE destroying response (important for memory safety)
637+
body_bytes = b''
638+
if resp.body and resp.body_len > 0:
639+
body_bytes = PyBytes_FromStringAndSize(<char*>resp.body, <Py_ssize_t>resp.body_len)
640+
624641
# Convert response to Python dict
625642
result = {
626643
'status_code': resp.status_code,
627644
'headers': {},
628-
'body': bytes(resp.body[:resp.body_len]) if resp.body else b'',
645+
'body': body_bytes,
629646
'http_version': resp.http_version,
630647
'connect_time_us': resp.connect_time_us,
631648
'tls_time_us': resp.tls_time_us,

src/core/async_request.c

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "async_request.h"
1212
#include "io_engine.h"
1313
#include "internal/proxy.h"
14+
#include "internal/util.h"
1415
#include <stdio.h>
1516
#include <stdlib.h>
1617
#include <string.h>
@@ -1009,20 +1010,30 @@ static int build_http_request(async_request_t *req) {
10091010
default: method_str = "GET"; break;
10101011
}
10111012

1012-
/* Extract path from URL (simple extraction) */
1013-
const char *path = strchr(request->url, '/');
1014-
if (path && path[0] == '/' && path[1] == '/') {
1015-
/* Skip scheme (http:// or https://) */
1016-
path = strchr(path + 2, '/');
1017-
}
1018-
if (!path || path[0] != '/') {
1019-
path = "/";
1013+
/* For HTTP through proxy, use absolute URI. For HTTPS or direct, use path only. */
1014+
const char *request_target;
1015+
bool use_absolute_uri = (req->proxy_host && !req->is_https);
1016+
1017+
if (use_absolute_uri) {
1018+
/* HTTP through proxy: use full URL as request target */
1019+
request_target = request->url;
1020+
} else {
1021+
/* HTTPS through proxy or direct connection: extract path */
1022+
const char *path = strchr(request->url, '/');
1023+
if (path && path[0] == '/' && path[1] == '/') {
1024+
/* Skip scheme (http:// or https://) */
1025+
path = strchr(path + 2, '/');
1026+
}
1027+
if (!path || path[0] != '/') {
1028+
path = "/";
1029+
}
1030+
request_target = path;
10201031
}
10211032

1022-
/* Build request line: METHOD path HTTP/1.1\r\n */
1033+
/* Build request line: METHOD request-target HTTP/1.1\r\n */
10231034
char *buf = (char *)req->send_buf;
10241035
int written = snprintf(buf, SEND_BUFFER_SIZE,
1025-
"%s %s HTTP/1.1\r\n", method_str, path);
1036+
"%s %s HTTP/1.1\r\n", method_str, request_target);
10261037
if (written < 0 || written >= (int)SEND_BUFFER_SIZE) {
10271038
return -1;
10281039
}
@@ -1034,6 +1045,25 @@ static int build_http_request(async_request_t *req) {
10341045
return -1;
10351046
}
10361047

1048+
/* Add Proxy-Authorization header for HTTP proxy (not HTTPS/CONNECT) */
1049+
if (use_absolute_uri && (req->proxy_username || req->proxy_password)) {
1050+
const char *username = req->proxy_username ? req->proxy_username : "";
1051+
const char *password = req->proxy_password ? req->proxy_password : "";
1052+
1053+
char credentials[512];
1054+
snprintf(credentials, sizeof(credentials), "%s:%s", username, password);
1055+
1056+
char *encoded = httpmorph_base64_encode(credentials, strlen(credentials));
1057+
if (encoded) {
1058+
written += snprintf(buf + written, SEND_BUFFER_SIZE - written,
1059+
"Proxy-Authorization: Basic %s\r\n", encoded);
1060+
free(encoded);
1061+
if (written >= (int)SEND_BUFFER_SIZE) {
1062+
return -1;
1063+
}
1064+
}
1065+
}
1066+
10371067
/* Add custom headers */
10381068
for (size_t i = 0; i < request->header_count; i++) {
10391069
written += snprintf(buf + written, SEND_BUFFER_SIZE - written,

src/core/async_request_manager.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,12 @@ void async_manager_destroy(async_request_manager_t *mgr) {
168168
* Grow request array if needed
169169
*/
170170
static int grow_request_array(async_request_manager_t *mgr) {
171+
/* Check for integer overflow before doubling */
172+
if (mgr->request_capacity > SIZE_MAX / 2 / sizeof(async_request_t*)) {
173+
/* Would overflow - reject new request */
174+
return -1;
175+
}
176+
171177
size_t new_capacity = mgr->request_capacity * 2;
172178
async_request_t **new_array = realloc(mgr->requests,
173179
new_capacity * sizeof(async_request_t*));

src/core/compression.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,19 @@ static int decompress_zlib(httpmorph_response_t *response, int windowBits) {
5252

5353
/* Handle need for more output space */
5454
while (ret == Z_BUF_ERROR || ret == Z_OK) {
55+
/* Check for integer overflow before doubling */
56+
if (decompressed_capacity > SIZE_MAX / 2) {
57+
/* Would overflow - fail decompression */
58+
inflateEnd(&stream);
59+
if (response->_buffer_pool) {
60+
buffer_pool_put((httpmorph_buffer_pool_t*)response->_buffer_pool,
61+
decompressed, decompressed_actual_size);
62+
} else {
63+
free(decompressed);
64+
}
65+
return -1;
66+
}
67+
5568
size_t new_capacity = decompressed_capacity * 2;
5669
size_t new_actual_size = 0;
5770
uint8_t *new_decompressed = NULL;

src/core/core.c

Lines changed: 4 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -73,59 +73,10 @@ httpmorph_response_t* httpmorph_request_execute(
7373
bool proxy_use_tls = false;
7474
SSL *proxy_ssl = NULL;
7575

76-
/* Try to reuse pooled proxy connection to same target */
77-
if (pool) {
78-
pooled_conn = pool_get_connection(pool, host, port);
79-
if (pooled_conn && pooled_conn->is_proxy) {
80-
/* Check if proxy URL matches */
81-
if (pooled_conn->proxy_url && strcmp(pooled_conn->proxy_url, request->proxy_url) == 0) {
82-
/* Reuse existing proxy connection */
83-
sockfd = pooled_conn->sockfd;
84-
ssl = pooled_conn->ssl;
85-
use_http2 = pooled_conn->is_http2;
86-
87-
/* Don't reuse HTTP/2 connections - HTTP/2 pooling has reliability issues */
88-
if (use_http2) {
89-
pool_connection_destroy(pooled_conn);
90-
pooled_conn = NULL;
91-
sockfd = -1;
92-
ssl = NULL;
93-
use_http2 = false;
94-
}
95-
96-
/* For SSL connections, verify still valid before reuse */
97-
if (ssl && sockfd >= 0) {
98-
int shutdown_state = SSL_get_shutdown(ssl);
99-
if (shutdown_state != 0) {
100-
/* SSL was shut down - destroy and recreate */
101-
pool_connection_destroy(pooled_conn);
102-
pooled_conn = NULL;
103-
sockfd = -1;
104-
ssl = NULL;
105-
}
106-
}
107-
108-
if (sockfd >= 0) {
109-
/* Connection reused - no connect/TLS time */
110-
connect_time = 0;
111-
response->tls_time_us = 0;
112-
113-
/* Restore TLS info from pooled connection */
114-
if (pooled_conn->ja3_fingerprint) {
115-
response->ja3_fingerprint = strdup(pooled_conn->ja3_fingerprint);
116-
}
117-
}
118-
} else {
119-
/* Different proxy URL - can't reuse, destroy it */
120-
pool_connection_destroy(pooled_conn);
121-
pooled_conn = NULL;
122-
}
123-
} else if (pooled_conn) {
124-
/* Got a direct connection when we need proxy - can't reuse */
125-
pool_connection_destroy(pooled_conn);
126-
pooled_conn = NULL;
127-
}
128-
}
76+
/* Don't use connection pool for proxy connections - they are not pooled
77+
* due to SSL_CTX use-after-free issues (see line 532 below).
78+
* If we retrieve a connection from pool here, it would be a stale direct
79+
* connection which cannot be used for proxy requests. */
12980

13081
/* If no pooled connection, create new proxy connection */
13182
if (sockfd < 0) {

src/core/http1.c

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,12 @@
2323
*
2424
* Returns new buffer on success, NULL on failure.
2525
* If successful, updates response->body, response->body_capacity, and response->_body_actual_size
26+
*
27+
* @param response The response structure
28+
* @param new_capacity The new capacity needed
29+
* @param current_data_size The actual amount of data currently in the buffer (NOT response->body_len)
2630
*/
27-
static uint8_t* realloc_body_buffer(httpmorph_response_t *response, size_t new_capacity) {
31+
static uint8_t* realloc_body_buffer(httpmorph_response_t *response, size_t new_capacity, size_t current_data_size) {
2832
uint8_t *new_body = NULL;
2933
size_t new_actual_size = 0;
3034

@@ -41,9 +45,9 @@ static uint8_t* realloc_body_buffer(httpmorph_response_t *response, size_t new_c
4145
return NULL;
4246
}
4347

44-
/* Copy existing data to new buffer */
45-
if (response->body && response->body_len > 0) {
46-
memcpy(new_body, response->body, response->body_len);
48+
/* Copy existing data to new buffer - use current_data_size, NOT response->body_len! */
49+
if (response->body && current_data_size > 0) {
50+
memcpy(new_body, response->body, current_data_size);
4751
}
4852

4953
/* Return old buffer to pool if available */
@@ -413,10 +417,16 @@ int httpmorph_recv_http_response(SSL *ssl, int sockfd, httpmorph_response_t *res
413417
if (body_in_buffer > 0) {
414418
if (response->body_capacity < body_in_buffer) {
415419
/* Use 2x growth strategy instead of exact size */
416-
size_t new_capacity = body_in_buffer * 2;
417-
if (!realloc_body_buffer(response, new_capacity)) {
418-
/* Allocation failed, but continue with existing buffer */
420+
/* Check for integer overflow before doubling */
421+
if (body_in_buffer > SIZE_MAX / 2) {
422+
/* Would overflow - use what we have */
419423
body_in_buffer = response->body_capacity;
424+
} else {
425+
size_t new_capacity = body_in_buffer * 2;
426+
if (!realloc_body_buffer(response, new_capacity, 0)) {
427+
/* Allocation failed, but continue with existing buffer */
428+
body_in_buffer = response->body_capacity;
429+
}
420430
}
421431
}
422432
memcpy(response->body, body_start, body_in_buffer);
@@ -436,7 +446,7 @@ int httpmorph_recv_http_response(SSL *ssl, int sockfd, httpmorph_response_t *res
436446
if (content_length > 0 && content_length < 100 * 1024 * 1024) {
437447
/* Known content length - pre-allocate exact size */
438448
if (response->body_capacity < content_length) {
439-
if (!realloc_body_buffer(response, content_length)) {
449+
if (!realloc_body_buffer(response, content_length, body_received)) {
440450
/* Allocation failed - will read what fits */
441451
}
442452
}
@@ -536,8 +546,15 @@ int httpmorph_recv_http_response(SSL *ssl, int sockfd, httpmorph_response_t *res
536546

537547
/* Ensure response body has enough space BEFORE copying */
538548
if (body_received + data_to_copy > response->body_capacity) {
539-
size_t new_capacity = (body_received + data_to_copy) * 2;
540-
if (!realloc_body_buffer(response, new_capacity)) {
549+
/* Check for integer overflow before doubling */
550+
size_t sum = body_received + data_to_copy;
551+
if (sum > SIZE_MAX / 2) {
552+
/* Would overflow - fail gracefully */
553+
last_chunk = true;
554+
break;
555+
}
556+
size_t new_capacity = sum * 2;
557+
if (!realloc_body_buffer(response, new_capacity, body_received)) {
541558
last_chunk = true;
542559
break;
543560
}
@@ -586,8 +603,13 @@ int httpmorph_recv_http_response(SSL *ssl, int sockfd, httpmorph_response_t *res
586603

587604
/* Expand buffer if needed */
588605
if (body_received > response->body_capacity - 2048) {
606+
/* Check for integer overflow before doubling */
607+
if (response->body_capacity > SIZE_MAX / 2) {
608+
/* Would overflow - stop reading */
609+
break;
610+
}
589611
size_t new_capacity = response->body_capacity * 2;
590-
if (!realloc_body_buffer(response, new_capacity)) break;
612+
if (!realloc_body_buffer(response, new_capacity, body_received)) break;
591613
}
592614
}
593615
}

src/core/http2_logic.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,16 @@ static int http2_on_data_chunk_recv_callback(nghttp2_session *session, uint8_t f
137137

138138
/* Expand buffer if needed */
139139
if (stream_data->data_len + len > stream_data->data_capacity) {
140-
size_t new_capacity = (stream_data->data_len + len) * 2;
140+
/* Calculate sum first to check for overflow */
141+
size_t sum = stream_data->data_len + len;
142+
143+
/* Check for integer overflow before doubling */
144+
if (sum > SIZE_MAX / 2) {
145+
/* Would overflow - either use SIZE_MAX or fail */
146+
return NGHTTP2_ERR_CALLBACK_FAILURE;
147+
}
148+
149+
size_t new_capacity = sum * 2;
141150
uint8_t *new_buf = realloc(stream_data->data_buf, new_capacity);
142151
if (!new_buf) return NGHTTP2_ERR_CALLBACK_FAILURE;
143152
stream_data->data_buf = new_buf;

0 commit comments

Comments
 (0)