Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/sentry/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -2612,7 +2612,7 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]:
name="sentry-api-0-organization-conduit-demo",
),
re_path(
r"^(?P<organization_id_or_slug>[^/]+)/objectstore/$",
r"^(?P<organization_id_or_slug>[^/]+)/objectstore/(?P<path>.*)$",
OrganizationObjectstoreEndpoint.as_view(),
name="sentry-api-0-organization-objectstore",
),
Expand Down
1 change: 1 addition & 0 deletions src/sentry/hybridcloud/apigateway/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
"sentry-api-0-installable-preprod-artifact-download": 90.0,
"sentry-api-0-project-preprod-artifact-download": 90.0,
"sentry-api-0-project-preprod-artifact-size-analysis-download": 90.0,
"sentry-api-0-organization-objectstore": 90.0,
}

# stream 0.5 MB at a time
Expand Down
157 changes: 147 additions & 10 deletions src/sentry/objectstore/endpoints/organization.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,174 @@
from collections.abc import Callable, Generator
from typing import Literal
from urllib.parse import urljoin
from wsgiref.util import is_hop_by_hop

import requests
from django.http import StreamingHttpResponse
from requests import Response as ExternalResponse
from rest_framework.request import Request
from rest_framework.response import Response

from sentry import features
from sentry import features, options
from sentry.api.api_owners import ApiOwner
from sentry.api.api_publish_status import ApiPublishStatus
from sentry.api.base import region_silo_endpoint
from sentry.api.bases import OrganizationEndpoint
from sentry.models.organization import Organization

CHUNK_SIZE = 512 * 1024


@region_silo_endpoint
class OrganizationObjectstoreEndpoint(OrganizationEndpoint):
publish_status = {
"GET": ApiPublishStatus.EXPERIMENTAL,
"PUT": ApiPublishStatus.EXPERIMENTAL,
"POST": ApiPublishStatus.EXPERIMENTAL,
"DELETE": ApiPublishStatus.EXPERIMENTAL,
}
owner = ApiOwner.FOUNDATIONAL_STORAGE
parser_classes = [] # accept arbitrary data and don't attempt to parse it

def get(self, request: Request, organization: Organization) -> Response:
def get(
self, request: Request, organization: Organization, path: str
) -> Response | StreamingHttpResponse:
if not features.has("organizations:objectstore-endpoint", organization, actor=request.user):
return Response(status=404)
# TODO: implement
return Response(status=200)
return self._proxy("GET", path, request)

def put(self, request: Request, organization: Organization) -> Response:
def put(
self, request: Request, organization: Organization, path: str
) -> Response | StreamingHttpResponse:
if not features.has("organizations:objectstore-endpoint", organization, actor=request.user):
return Response(status=404)
# TODO: implement
return Response(status=200)
return self._proxy("PUT", path, request)

def delete(self, request: Request, organization: Organization) -> Response:
def post(
self, request: Request, organization: Organization, path: str
) -> Response | StreamingHttpResponse:
if not features.has("organizations:objectstore-endpoint", organization, actor=request.user):
return Response(status=404)
# TODO: implement
return Response(status=200)
return self._proxy("POST", path, request)

def delete(
self, request: Request, organization: Organization, path: str
) -> Response | StreamingHttpResponse:
if not features.has("organizations:objectstore-endpoint", organization, actor=request.user):
return Response(status=404)
return self._proxy("DELETE", path, request)

def _proxy(
self,
method: Literal["GET", "PUT", "POST", "DELETE"],
path: str,
request: Request,
) -> Response | StreamingHttpResponse:
target_base_url = options.get("objectstore.config")["base_url"].rstrip("/")
target_url = urljoin(target_base_url, path)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The urljoin function may not behave as expected when joining paths. According to Python's documentation, urljoin('http://localhost:8888/', 'health') will correctly return http://localhost:8888/health, but urljoin('http://localhost:8888/', '/health') would replace the path entirely. If a path starts with '/', it will be treated as an absolute path. Consider using a more robust path joining approach that handles both leading and non-leading slash cases, similar to the pattern used in sentry/utils/http.py which does urljoin(url_prefix.rstrip("/") + "/", url.lstrip("/")).
Severity: MEDIUM

💡 Suggested Fix

Suggested change
target_url = urljoin(target_base_url + "/", path.lstrip("/"))

Did we get this right? 👍 / 👎 to inform future reviews.
Reference ID: 3799736

headers = dict(request.headers)
Comment on lines +62 to +70
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _proxy method doesn't validate or sanitize the path parameter. While the URL pattern (?P<path>.*) captures any string, there's no check to prevent path traversal attacks (e.g., ../../../etc/passwd). The objectstore backend should validate this, but the gateway should also implement defensive checks. Consider validating that the path doesn't contain suspicious patterns like .. or //.
Severity: MEDIUM

🤖 Prompt for AI Agent

Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/objectstore/endpoints/organization.py#L62-L70

Potential issue: The `_proxy` method doesn't validate or sanitize the `path` parameter.
While the URL pattern `(?P<path>.*)` captures any string, there's no check to prevent
path traversal attacks (e.g., `../../../etc/passwd`). The objectstore backend should
validate this, but the gateway should also implement defensive checks. Consider
validating that the path doesn't contain suspicious patterns like `..` or `//`.

Did we get this right? 👍 / 👎 to inform future reviews.
Reference ID: 3799736

if method in ("PUT", "POST") and not headers.get("Transfer-Encoding") == "chunked":
return Response("Only Transfer-Encoding: chunked is supported", status=400)

headers.pop("Content-Length", None)
headers.pop("Transfer-Encoding", None)

body_stream = None
if method in ("PUT", "POST"):
wsgi_input = request._request.META.get("wsgi.input")
assert wsgi_input
stream_func = wsgi_input._read
body_stream = ChunkedStreamDecoder(stream_func)

response = requests.request(
method,
url=target_url,
headers=headers,
data=body_stream,
params=dict(request.GET) if request.GET else None,
stream=True,
allow_redirects=False,
)
Comment on lines +84 to +92

Check failure

Code scanning / CodeQL

Full server-side request forgery Critical

The full URL of this request depends on a
user-provided value
.
The full URL of this request depends on a
user-provided value
.
The full URL of this request depends on a
user-provided value
.
The full URL of this request depends on a
user-provided value
.

Copilot Autofix

AI 1 day ago

To fix this Full SSRF vulnerability, we must prevent users from being able to set the full target of the outgoing HTTP request. The best pattern is to validate "path" so that only safe, known locations can be accessed. Generally, this means only allowing "path" to designate a relative path within a known base URL, making sure it cannot escape with e.g. leading "//", absolute URLs, etc.

A robust fix:

  1. Restrict "path" to ensure it cannot be an absolute URL, start with "//", or otherwise escape the intended target domain. This can be achieved by checking that "path" is a relative path, does not start with "http:", "https:", "//", or similar.

  2. Additionally, consider whitelisting or regular-expression filtering for allowed "path" formats, blocking traversal characters ("../", "..").

  3. Apply this check at the entrypoint to the _proxy function (or earlier). If "path" fails validation, return a 400 error.

Implementation:

  • Add a validation function (e.g., _is_safe_path) inside OrganizationObjectstoreEndpoint.
  • Use it right before constructing target_url. If validation fails, respond with a 400 error message.
  • This fix is limited to the edited region and does not change existing endpoint logic.

Suggested changeset 1
src/sentry/objectstore/endpoints/organization.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/src/sentry/objectstore/endpoints/organization.py b/src/sentry/objectstore/endpoints/organization.py
--- a/src/sentry/objectstore/endpoints/organization.py
+++ b/src/sentry/objectstore/endpoints/organization.py
@@ -64,6 +64,8 @@
         path: str,
         request: Request,
     ) -> Response | StreamingHttpResponse:
+        if not self._is_safe_path(path):
+            return Response("Unsafe path argument", status=400)
         target_base_url = options.get("objectstore.config")["base_url"].rstrip("/")
         target_url = urljoin(target_base_url, path)
 
@@ -95,6 +97,24 @@
         return stream_response(response)
 
 
+    def _is_safe_path(self, path: str) -> bool:
+        """
+        Return True if the provided path is safe to join to the base URL.
+        Rejects absolute URLs and schemes, and known SSRF exploit forms.
+        """
+        # Block absolute URLs and scheme
+        unsafe_prefixes = ("http://", "https://", "ftp://", "//")
+        if any(path.startswith(prefix) for prefix in unsafe_prefixes):
+            return False
+        # Block attempts to traverse upwards (optional - for stricter control)
+        if ".." in path or path.startswith("/"):
+            return False
+        # You may also want a stricter regex for allowed characters, e.g.:
+        # import re
+        # if not re.fullmatch(r"[a-zA-Z0-9_\-/\.]+", path):
+        #     return False
+        return True
+
 class ChunkedStreamDecoder:
     """
     Decodes HTTP chunked transfer encoding on-the-fly without buffering.
EOF
@@ -64,6 +64,8 @@
path: str,
request: Request,
) -> Response | StreamingHttpResponse:
if not self._is_safe_path(path):
return Response("Unsafe path argument", status=400)
target_base_url = options.get("objectstore.config")["base_url"].rstrip("/")
target_url = urljoin(target_base_url, path)

@@ -95,6 +97,24 @@
return stream_response(response)


def _is_safe_path(self, path: str) -> bool:
"""
Return True if the provided path is safe to join to the base URL.
Rejects absolute URLs and schemes, and known SSRF exploit forms.
"""
# Block absolute URLs and scheme
unsafe_prefixes = ("http://", "https://", "ftp://", "//")
if any(path.startswith(prefix) for prefix in unsafe_prefixes):
return False
# Block attempts to traverse upwards (optional - for stricter control)
if ".." in path or path.startswith("/"):
return False
# You may also want a stricter regex for allowed characters, e.g.:
# import re
# if not re.fullmatch(r"[a-zA-Z0-9_\-/\.]+", path):
# return False
return True

class ChunkedStreamDecoder:
"""
Decodes HTTP chunked transfer encoding on-the-fly without buffering.
Copilot is powered by AI and may make mistakes. Always verify output.
response.raise_for_status()

return stream_response(response)


class ChunkedStreamDecoder:
"""
Decodes HTTP chunked transfer encoding on-the-fly without buffering.
Implements file-like interface for streaming to requests library.
"""

def __init__(self, read_func: Callable[[int], bytes]):
self._read = read_func
self._done = False
self._current_chunk_remaining = 0

def read(self, size: int = -1) -> bytes:
if self._done:
return b""

result = []
bytes_read = 0
target_size = size if size > 0 else 8192

while bytes_read < target_size:
if self._current_chunk_remaining > 0:
to_read = min(self._current_chunk_remaining, target_size - bytes_read)
chunk = self._read(to_read)
if not chunk:
self._done = True
break
result.append(chunk)
bytes_read += len(chunk)
self._current_chunk_remaining -= len(chunk)

if self._current_chunk_remaining == 0:
self._read(2) # Read trailing \r\n
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: ChunkedStreamDecoder ignores the return value of self._read(2) for \r\n consumption, corrupting chunk parsing on incomplete reads.
Severity: HIGH | Confidence: High

🔍 Detailed Analysis

The ChunkedStreamDecoder calls self._read(2) to consume the trailing \r\n after each chunk. However, the WSGI wsgi.input stream's read(size) method does not guarantee returning exactly size bytes. If _read(2) returns fewer than 2 bytes, the decoder ignores this, and its state is not advanced correctly. This causes subsequent chunk size parsing to become corrupted, leading to a ValueError on line 142 and stream decoding failures.

💡 Suggested Fix

The _read(2) call should verify that exactly 2 bytes were read. If fewer than 2 bytes are returned, the decoder should handle the incomplete read, possibly by retrying or raising an error, to ensure its state is correctly advanced.

🤖 Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/objectstore/endpoints/organization.py#L129

Potential issue: The `ChunkedStreamDecoder` calls `self._read(2)` to consume the
trailing `\r\n` after each chunk. However, the WSGI `wsgi.input` stream's `read(size)`
method does not guarantee returning exactly `size` bytes. If `_read(2)` returns fewer
than 2 bytes, the decoder ignores this, and its state is not advanced correctly. This
causes subsequent chunk size parsing to become corrupted, leading to a `ValueError` on
line 142 and stream decoding failures.

Did we get this right? 👍 / 👎 to inform future reviews.
Reference ID: 3799736

else:
# Read next chunk size line
size_line = b""
while not size_line.endswith(b"\r\n"):
byte = self._read(1)
if not byte:
Comment on lines +125 to +135
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the ChunkedStreamDecoder.read() method, when reading chunk size lines byte-by-byte, if the connection is dropped mid-chunk-size (e.g., client sends 123 without \r\n), the code will loop indefinitely trying to read the next byte. The method should have a timeout or maximum iteration limit to prevent hanging. Consider adding a maximum size limit for chunk size lines (they should typically be quite small).
Severity: MEDIUM

🤖 Prompt for AI Agent

Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/objectstore/endpoints/organization.py#L125-L135

Potential issue: In the `ChunkedStreamDecoder.read()` method, when reading chunk size
lines byte-by-byte, if the connection is dropped mid-chunk-size (e.g., client sends
`123` without `\r\n`), the code will loop indefinitely trying to read the next byte. The
method should have a timeout or maximum iteration limit to prevent hanging. Consider
adding a maximum size limit for chunk size lines (they should typically be quite small).

Did we get this right? 👍 / 👎 to inform future reviews.
Reference ID: 3799736

self._done = True
return b"".join(result)
size_line += byte

try:
chunk_size = int(size_line.strip(), 16)
except ValueError:
self._done = True
return b"".join(result)

if chunk_size == 0:
self._read(2) # Read trailing \r\n
self._done = True
return b"".join(result)

self._current_chunk_remaining = chunk_size

return b"".join(result)


def stream_response(response: ExternalResponse) -> StreamingHttpResponse:
def stream() -> Generator[bytes]:
response.raw.decode_content = False
while True:
chunk = response.raw.read(CHUNK_SIZE)
if not chunk:
break
yield chunk

streamed_response = StreamingHttpResponse(
streaming_content=stream(),
status=response.status_code,
)

for header, value in response.headers.items():
if not is_hop_by_hop(header):
streamed_response[header] = value

return streamed_response
2 changes: 1 addition & 1 deletion static/app/utils/api/knownSentryApiUrls.generated.ts
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ export type KnownSentryApiUrls =
| '/organizations/$organizationIdOrSlug/notifications/actions/'
| '/organizations/$organizationIdOrSlug/notifications/actions/$actionId/'
| '/organizations/$organizationIdOrSlug/notifications/available-actions/'
| '/organizations/$organizationIdOrSlug/objectstore/'
| '/organizations/$organizationIdOrSlug/objectstore/$path'
| '/organizations/$organizationIdOrSlug/onboarding-continuation-email/'
| '/organizations/$organizationIdOrSlug/onboarding-tasks/'
| '/organizations/$organizationIdOrSlug/ondemand-rules-stats/'
Expand Down
100 changes: 90 additions & 10 deletions tests/sentry/objectstore/endpoints/test_organization.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,103 @@
from sentry.testutils.cases import APITestCase
import pytest
import requests
from django.urls import reverse
from objectstore_client import Client, RequestError, Session, Usecase
from pytest_django.live_server_helper import LiveServer

from sentry.testutils.cases import TransactionTestCase
from sentry.testutils.helpers.features import with_feature
from sentry.testutils.silo import region_silo_test
from sentry.testutils.skips import requires_objectstore


@pytest.fixture(scope="function")
def local_live_server(request: pytest.FixtureRequest, live_server: LiveServer) -> None:
if hasattr(request, "cls"):
request.cls.live_server = live_server
request.node.live_server = live_server


@region_silo_test
class OrganizationObjectstoreEndpointTest(APITestCase):
@requires_objectstore
@pytest.mark.usefixtures("local_live_server")
class OrganizationObjectstoreEndpointTest(TransactionTestCase):
endpoint = "sentry-api-0-organization-objectstore"
live_server: LiveServer

def setUp(self) -> None:
super().setUp()
self.login_as(user=self.user)
self.organization = self.create_organization(owner=self.user)
self.api_key = self.create_api_key(
organization=self.organization,
scope_list=["org:admin"],
)

def test_feature_flag_disabled(self):
"""Without feature flag, returns 404"""
response = self.get_response(self.organization.slug)
assert response.status_code == 404
def get_endpoint_url(self) -> str:
path = reverse(
self.endpoint,
kwargs={
"organization_id_or_slug": self.organization.id,
"path": "",
},
)
return f"{self.live_server.url}{path}"

def get_auth_headers(self) -> dict[str, str]:
auth_header = self.create_basic_auth_header(self.api_key.key)
return {"Authorization": auth_header.decode()}

def get_session(self) -> Session:
client = Client(
self.get_endpoint_url(), connection_kwargs={"headers": self.get_auth_headers()}
)
session = client.session(Usecase("test"), org=self.organization.id)
return session

@with_feature("organizations:objectstore-endpoint")
def test_feature_flag_enabled(self):
"""With feature flag, endpoint is accessible"""
response = self.get_response(self.organization.slug)
assert response.status_code == 200
def test_health(self):
url = self.get_endpoint_url() + "health"
res = requests.get(url, headers=self.get_auth_headers())
res.raise_for_status()

@with_feature("organizations:objectstore-endpoint")
def test_full_cycle(self):
session = self.get_session()

object_key = session.put(b"test data")
assert object_key is not None

retrieved = session.get(object_key)
assert retrieved.payload.read() == b"test data"

new_key = session.put(b"new data", key=object_key)
assert new_key == object_key

retrieved = session.get(object_key)
assert retrieved.payload.read() == b"new data"

session.delete(object_key)

with pytest.raises(RequestError):
session.get(object_key)

@with_feature("organizations:objectstore-endpoint")
def test_uncompressed(self):
session = self.get_session()

object_key = session.put(b"test data", compression="none")
assert object_key is not None

retrieved = session.get(object_key)
assert retrieved.payload.read() == b"test data"

@with_feature("organizations:objectstore-endpoint")
def test_large_payload(self):
session = self.get_session()
data = b"A" * 1_000_000

object_key = session.put(data)
assert object_key is not None

retrieved = session.get(object_key)
assert retrieved.payload.read() == data
Loading