Skip to content

Commit 2d37c3e

Browse files
committed
lots of random tests and debugging to try and figure out whats going on
1 parent 60811a2 commit 2d37c3e

File tree

3 files changed

+180
-2
lines changed

3 files changed

+180
-2
lines changed

src/stac_auth_proxy/middleware/ProcessLinksMiddleware.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ def transform_json(self, data: dict[str, Any], request: Request) -> dict[str, An
4545
req_base_url = get_base_url(request)
4646
parsed_req_url = urlparse(req_base_url)
4747
parsed_upstream_url = urlparse(self.upstream_url)
48+
49+
# DEBUG: Log URL processing
50+
logger.info(f"DEBUG ProcessLinks: req_base_url = {req_base_url}")
51+
logger.info(f"DEBUG ProcessLinks: parsed_req_url.netloc = {parsed_req_url.netloc}")
52+
logger.info(f"DEBUG ProcessLinks: upstream_url = {self.upstream_url}")
53+
logger.info(f"DEBUG ProcessLinks: parsed_upstream_url.netloc = {parsed_upstream_url.netloc}")
4854

4955
for link in get_links(data):
5056
try:
@@ -95,6 +101,8 @@ def _update_link(
95101

96102
# Replace the upstream host with the client's host
97103
if parsed_link.netloc == upstream_url.netloc:
104+
logger.info(f"DEBUG _update_link: Original link href = {link['href']}")
105+
logger.info(f"DEBUG _update_link: Replacing netloc {parsed_link.netloc} with {request_url.netloc}")
98106
parsed_link = parsed_link._replace(netloc=request_url.netloc)._replace(
99107
scheme=request_url.scheme
100108
)
@@ -118,4 +126,6 @@ def _update_link(
118126
urlunparse(parsed_link),
119127
)
120128

121-
link["href"] = urlunparse(parsed_link)
129+
new_href = urlunparse(parsed_link)
130+
logger.info(f"DEBUG _update_link: Final transformed href = {new_href}")
131+
link["href"] = new_href

src/stac_auth_proxy/utils/requests.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,16 +189,27 @@ def get_base_url(request: Request) -> str:
189189
forwarded_proto = request.headers.get("X-Forwarded-Proto")
190190
forwarded_path = request.headers.get("X-Forwarded-Path")
191191

192+
# DEBUG: Log request details
193+
logger.info(f"DEBUG get_base_url: request.url = {request.url}")
194+
logger.info(f"DEBUG get_base_url: request.url.netloc = {request.url.netloc}")
195+
logger.info(f"DEBUG get_base_url: Host header = {request.headers.get('host')}")
196+
logger.info(f"DEBUG get_base_url: X-Forwarded-Host = {forwarded_host}")
197+
logger.info(f"DEBUG get_base_url: X-Forwarded-Proto = {forwarded_proto}")
198+
192199
if forwarded_host:
193200
# Use forwarded headers to reconstruct the original client URL
194201
scheme = forwarded_proto or request.url.scheme
195202
netloc = forwarded_host
196203
# Use forwarded path if available, otherwise use request base URL path
197204
path = forwarded_path or request.base_url.path
205+
logger.info(f"DEBUG get_base_url: Using forwarded headers - netloc = {netloc}")
198206
else:
199207
# Fall back to the request's base URL if no forwarded headers
200208
scheme = request.url.scheme
201209
netloc = request.url.netloc
202210
path = request.base_url.path
211+
logger.info(f"DEBUG get_base_url: Using request URL - netloc = {netloc}")
203212

204-
return f"{scheme}://{netloc}{path}"
213+
result = f"{scheme}://{netloc}{path}"
214+
logger.info(f"DEBUG get_base_url: Final result = {result}")
215+
return result

tests/test_process_links.py

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,3 +639,160 @@ def test_transform_with_forwarded_headers(headers, expected_base_url):
639639
# but not include the forwarded path in the response URLs
640640
assert transformed["links"][0]["href"] == f"{expected_base_url}/proxy/collections"
641641
assert transformed["links"][1]["href"] == f"{expected_base_url}/proxy"
642+
643+
644+
@pytest.mark.parametrize(
645+
"upstream_url,request_host,request_port,scheme,input_links,expected_links",
646+
[
647+
# Test case 1: Non-standard port (should work correctly)
648+
(
649+
"http://montandon-eoapi-stac:8080",
650+
"stac-test.whydidweevendothis.com",
651+
1234,
652+
"http",
653+
[
654+
{"rel": "self", "href": "http://montandon-eoapi-stac:8080/collections"},
655+
],
656+
[
657+
"http://stac-test.whydidweevendothis.com:1234/collections",
658+
],
659+
),
660+
# Test case 2: Standard port HTTP (80) - BUG: should NOT include upstream port
661+
(
662+
"http://montandon-eoapi-stac:8080",
663+
"stac-test.whydidweevendothis.com",
664+
80,
665+
"http",
666+
[
667+
{"rel": "self", "href": "http://montandon-eoapi-stac:8080/collections"},
668+
],
669+
[
670+
"http://stac-test.whydidweevendothis.com/collections", # Should NOT have :8080
671+
],
672+
),
673+
# Test case 3: Standard port HTTPS (443) - BUG: should NOT include upstream port
674+
(
675+
"http://montandon-eoapi-stac:8080",
676+
"stac-test.whydidweevendothis.com",
677+
443,
678+
"https",
679+
[
680+
{"rel": "self", "href": "http://montandon-eoapi-stac:8080/collections"},
681+
],
682+
[
683+
"https://stac-test.whydidweevendothis.com/collections", # Should NOT have :8080
684+
],
685+
),
686+
# Test case 4: Standard port but NO server info (simulate real-world missing port)
687+
(
688+
"http://montandon-eoapi-stac:8080",
689+
"stac-test.whydidweevendothis.com",
690+
None, # No port specified - simulate real-world scenario
691+
"https",
692+
[
693+
{"rel": "self", "href": "http://montandon-eoapi-stac:8080/collections"},
694+
],
695+
[
696+
"https://stac-test.whydidweevendothis.com/collections", # Should NOT have :8080
697+
],
698+
),
699+
],
700+
)
701+
def test_transform_upstream_links_standard_ports_bug(
702+
upstream_url, request_host, request_port, scheme, input_links, expected_links
703+
):
704+
"""Test that demonstrates the port handling bug with standard ports.
705+
706+
This test demonstrates the issue where standard ports (80, 443) cause
707+
the upstream port to appear in the final URLs incorrectly.
708+
"""
709+
middleware = ProcessLinksMiddleware(
710+
app=None, upstream_url=upstream_url, root_path=None
711+
)
712+
713+
# Simulate request coming through standard port (80 or 443)
714+
# For standard ports, browsers don't include port in Host header and ASGI scope
715+
if request_port is None:
716+
# Simulate real-world scenario where port is not specified anywhere
717+
host_header = request_host # No port in Host header
718+
server_info = (request_host, 443) # Server might default to 443 for HTTPS
719+
elif request_port in [80, 443]:
720+
host_header = request_host # No port in Host header for standard ports
721+
server_info = (request_host, request_port) # But server info might be available
722+
else:
723+
host_header = f"{request_host}:{request_port}" # Non-standard ports include port
724+
server_info = (request_host, request_port)
725+
726+
request_scope = {
727+
"type": "http",
728+
"scheme": scheme,
729+
"path": "/test",
730+
"headers": [
731+
(b"host", host_header.encode()),
732+
(b"content-type", b"application/json"),
733+
],
734+
"server": server_info,
735+
}
736+
737+
data = {"links": input_links}
738+
transformed = middleware.transform_json(data, Request(request_scope))
739+
740+
for i, expected in enumerate(expected_links):
741+
actual = transformed["links"][i]["href"]
742+
print(f"Test case: port {request_port}")
743+
print(f"Expected: {expected}")
744+
print(f"Actual: {actual}")
745+
assert actual == expected, f"Port {request_port} test failed: expected {expected}, got {actual}"
746+
747+
748+
def test_transform_upstream_links_no_host_port_edge_case():
749+
"""Test the specific edge case that causes the bug.
750+
751+
This test simulates what happens when:
752+
1. Browser accesses http://localhost (no port in Host header)
753+
2. Server is running on port 8000 internally
754+
3. Upstream URL has port 8080
755+
4. The bug: final URLs incorrectly show :8080 instead of no port
756+
"""
757+
middleware = ProcessLinksMiddleware(
758+
app=None,
759+
upstream_url="http://montandon-eoapi-stac:8080",
760+
root_path=None
761+
)
762+
763+
# Simulate the exact scenario that fails:
764+
# - Host header: just "localhost" (no port)
765+
# - Server port: 8000 (internal container port)
766+
# - Scheme: http
767+
# - Expected result: http://localhost/collections (no port)
768+
# - Bug: gets http://localhost:8080/collections (upstream port!)
769+
request_scope = {
770+
"type": "http",
771+
"scheme": "http",
772+
"path": "/collections",
773+
"query_string": b"",
774+
"headers": [
775+
(b"host", b"localhost"), # NO PORT in host header
776+
(b"content-type", b"application/json"),
777+
],
778+
"server": ("127.0.0.1", 8000), # Internal server port
779+
}
780+
781+
# Test with a link from upstream that has port 8080
782+
data = {
783+
"links": [
784+
{"rel": "self", "href": "http://montandon-eoapi-stac:8080/collections"},
785+
]
786+
}
787+
788+
# This should transform to http://localhost/collections (no port)
789+
transformed = middleware.transform_json(data, Request(request_scope))
790+
actual = transformed["links"][0]["href"]
791+
expected = "http://localhost/collections"
792+
793+
print(f"Edge case test:")
794+
print(f"Expected: {expected}")
795+
print(f"Actual: {actual}")
796+
797+
# This test should FAIL and demonstrate the bug
798+
assert actual == expected, f"Edge case failed: expected {expected}, got {actual}"

0 commit comments

Comments
 (0)