Skip to content
This repository was archived by the owner on Jun 13, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
c8685db
Update `shared` and remove statsd metrics usage
Swatinem Oct 22, 2024
52dd85c
remove usage of mock_metrics
Swatinem Oct 22, 2024
2909aa6
feat: add Prometheus metrics for uploads
lvthanh03 Oct 22, 2024
91a8578
Replace GQL Sentry Metrics with Prometheus metrics
lvthanh03 Oct 22, 2024
ac58dbf
validate: Replace Sentry metrics with Prometheus metrics
lvthanh03 Oct 22, 2024
6d72497
upload: Modify generate metrics labels helper function
lvthanh03 Oct 22, 2024
5af7369
commits: replace Sentry metrics with Prometheus metrics
lvthanh03 Oct 22, 2024
af3f0f0
Add fill_labels=False to avoid redundant labels
lvthanh03 Oct 22, 2024
4c79e54
empty_upload: Replace Sentry metrics with Prometheus metrics
lvthanh03 Oct 22, 2024
cfbe611
legacy upload: replace Sentry metrics with Prometheus metrics
lvthanh03 Oct 22, 2024
9fe6d4b
create_upload: replace Sentry metrics with Prometheus metrics
lvthanh03 Oct 22, 2024
66f02a9
upload_complete: replace Sentry metrics with Prometheus metrics
lvthanh03 Oct 22, 2024
3f86b59
test_results: replace Sentry metrics with Prometheus metrics
lvthanh03 Oct 22, 2024
e9ef0b4
reports: replace Sentry metrics with Prometheus metrics
lvthanh03 Oct 22, 2024
9ff1ba8
Fix incr() and inc() typos
lvthanh03 Oct 22, 2024
79d6d52
Merge branch 'main' into tony/prometheus-metrics
lvthanh03 Oct 22, 2024
2dfa87b
fix: Lints
lvthanh03 Oct 22, 2024
2e71531
More syntax and lint fixes
lvthanh03 Oct 22, 2024
eaf9a41
helpers: prometheus metrics labels function linting fix
lvthanh03 Oct 22, 2024
4d2c20e
reports: Make mock metrics name consistent
lvthanh03 Oct 22, 2024
61865f0
Use inc_counter in shared.metrics
lvthanh03 Oct 23, 2024
27f74ad
bundle_analysis: remove try block
lvthanh03 Oct 23, 2024
00bd746
Merge branch 'main' into tony/prometheus-metrics
lvthanh03 Oct 23, 2024
13086fd
Function name clarity ("labels" instead of "tags")
lvthanh03 Oct 23, 2024
7077928
fix: Lints
lvthanh03 Oct 23, 2024
08b9256
Bump shared version
lvthanh03 Oct 23, 2024
44b2c12
Merge remote-tracking branch 'origin/swatinem/update-shared' into ton…
lvthanh03 Oct 23, 2024
3ad8aff
Remove self.test_instances[0].test.failure_rate
lvthanh03 Oct 23, 2024
bb7757a
Remove commits_where_fail
lvthanh03 Oct 23, 2024
e222298
Merge branch 'main' into tony/prometheus-metrics
lvthanh03 Oct 24, 2024
e052728
Merge branch 'main' into tony/prometheus-metrics
lvthanh03 Oct 25, 2024
2386132
Merge branch 'main' into tony/prometheus-metrics
lvthanh03 Oct 28, 2024
919ee71
Use include_empty_labels for extra clarity
lvthanh03 Oct 28, 2024
c691c6e
Merge branch 'main' into tony/prometheus-metrics
lvthanh03 Oct 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions graphql_api/tests/test_views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
from unittest.mock import Mock, call, patch
from unittest.mock import Mock, patch

from ariadne import ObjectType, make_executable_schema
from ariadne.validation import cost_directive
Expand Down Expand Up @@ -201,11 +201,12 @@ async def test_query_metrics_extension_set_type_and_name_timeout(
assert extension.operation_type == "unknown_type"
assert extension.operation_name == "unknown_name"

@patch("sentry_sdk.metrics.incr")
@patch("graphql_api.views.GQL_REQUEST_MADE_COUNTER.labels")
@patch("graphql_api.views.GQL_ERROR_TYPE_COUNTER.labels")
@patch("graphql_api.views.AsyncGraphqlView._check_ratelimit")
@override_settings(DEBUG=False, GRAPHQL_RATE_LIMIT_RPM=1000)
async def test_when_rate_limit_reached(
self, mocked_check_ratelimit, mocked_sentry_incr
self, mocked_check_ratelimit, mocked_error_counter, mocked_request_counter
):
schema = generate_cost_test_schema()
mocked_check_ratelimit.return_value = True
Expand All @@ -217,11 +218,10 @@ async def test_when_rate_limit_reached(
== "It looks like you've hit the rate limit of 1000 req/min. Try again later."
)

expected_calls = [
call("graphql.info.request_made", tags={"path": "/graphql/gh"}),
call("graphql.error.rate_limit", tags={"path": "/graphql/gh"}),
]
mocked_sentry_incr.assert_has_calls(expected_calls)
mocked_error_counter.assert_called_with(
error_type="rate_limit", path="/graphql/gh"
)
mocked_request_counter.assert_called_with(path="/graphql/gh")

@override_settings(
DEBUG=False, GRAPHQL_RATE_LIMIT_RPM=0, GRAPHQL_RATE_LIMIT_ENABLED=False
Expand Down
46 changes: 34 additions & 12 deletions graphql_api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
)
from graphql import DocumentNode
from sentry_sdk import capture_exception
from sentry_sdk import metrics as sentry_metrics
from shared.metrics import Counter, Histogram
from shared.metrics import Counter, Histogram, inc_counter

from codecov.commands.exceptions import BaseException
from codecov.commands.executor import get_executor_from_request
Expand Down Expand Up @@ -51,6 +50,17 @@
buckets=[0.05, 0.1, 0.25, 0.5, 0.75, 1, 2, 5, 10, 30],
)

GQL_REQUEST_MADE_COUNTER = Counter(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't quite tell - what's the intended difference between this one (GQL_REQUEST_MADE_COUNTER) and GQL_HIT_COUNTER in above (line 34)? Any opportunities to combine?

Also a nit but it seems like we use "Total" in the description when it's a Histogram not Counter

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added GQL_REQUEST_MADE_COUNTER so that the post function in AsyncGraphqlView class can increment this metric with the label path=req_path, and the other metric GQL_HIT_COUNTER is called when the actual gql API is called through the request_started extension: https://ariadnegraphql.org/docs/extensions. If we want to combine this, then we would have to give req_path to the QueryMetricsExtension in some way, I was thinking of setting a self.path attribute to the AsyncGraphqlView class but since it's handling many async requests I don't think that would work. Do you know of a way to implement this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe path is available in the context argument like info.context["request"].url.path, if that's what you mean?

If this will be a duplicate of the information in GQL_HIT_COUNTER then it seems like removing/combining makes sense. But since it sounds like it's meant to be another step in the lifecycle of a request, we can use it for debugging, for example comparing numbers against the GQL_REQUEST_MADE_COUNTER. Actually I like the location of the GQL_REQUEST_MADE_COUNTER better than the other one that already existed, so we can just leave it! I saw you were just converting the sentry one to prometheus anyway so we can just proceed.

"api_gql_requests_made",
"Total API GQL requests made",
["path"],
)

GQL_ERROR_TYPE_COUNTER = Counter(
"api_gql_errors",
"Number of times API GQL endpoint failed with an exception by type",
["error_type", "path"],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we include a comment on when to use existing GQL_ERROR_COUNTER vs. new GQL_ERROR_TYPE_COUNTER for a particular use case? Or is this something we could potentially combine?

)

# covers named and 3 unnamed operations (see graphql_api/types/query/query.py)
GQL_TYPE_AND_NAME_PATTERN = r"^(query|mutation|subscription)(?:\(\$input:|) (\w+)(?:\(| \(|{| {|!)|^(?:{) (me|owner|config)(?:\(| |{)"
Expand Down Expand Up @@ -109,9 +119,13 @@ def request_started(self, context):
"""
self.set_type_and_name(query=context["clean_query"])
self.start_timestamp = time.perf_counter()
GQL_HIT_COUNTER.labels(
operation_type=self.operation_type, operation_name=self.operation_name
).inc()
inc_counter(
GQL_HIT_COUNTER,
labels=dict(
operation_type=self.operation_type,
operation_name=self.operation_name,
),
)

def request_finished(self, context):
"""
Expand Down Expand Up @@ -226,10 +240,12 @@ async def post(self, request, *args, **kwargs):
"user": request.user,
}
log.info("GraphQL Request", extra=log_data)
sentry_metrics.incr("graphql.info.request_made", tags={"path": req_path})

inc_counter(GQL_REQUEST_MADE_COUNTER, labels=dict(path=req_path))
if self._check_ratelimit(request=request):
sentry_metrics.incr("graphql.error.rate_limit", tags={"path": req_path})
inc_counter(
GQL_ERROR_TYPE_COUNTER,
labels=dict(error_type="rate_limit", path=req_path),
)
return JsonResponse(
data={
"status": 429,
Expand All @@ -245,7 +261,10 @@ async def post(self, request, *args, **kwargs):
data = json.loads(content)

if "errors" in data:
sentry_metrics.incr("graphql.error.all", tags={"path": req_path})
inc_counter(
GQL_ERROR_TYPE_COUNTER,
labels=dict(error_type="all", path=req_path),
)
try:
if data["errors"][0]["extensions"]["cost"]:
costs = data["errors"][0]["extensions"]["cost"]
Expand All @@ -257,9 +276,12 @@ async def post(self, request, *args, **kwargs):
request_body=req_body,
),
)
sentry_metrics.incr(
"graphql.error.query_cost_exceeded",
tags={"path": req_path},
inc_counter(
GQL_ERROR_TYPE_COUNTER,
labels=dict(
error_type="query_cost_exceeded",
path=req_path,
),
)
return HttpResponseBadRequest(
JsonResponse("Your query is too costly.")
Expand Down
27 changes: 19 additions & 8 deletions upload/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,14 +790,15 @@ def get_version_from_headers(headers):
return "unknown-user-agent"


def generate_upload_sentry_metrics_tags(
def generate_upload_prometheus_metrics_labels(
action,
request,
is_shelter_request,
endpoint: Optional[str] = None,
repository: Optional[Repository] = None,
position: Optional[str] = None,
upload_version: Optional[str] = None,
include_empty_labels: bool = True,
):
metrics_tags = dict(
agent=get_agent_from_headers(request.headers),
Expand All @@ -806,13 +807,23 @@ def generate_upload_sentry_metrics_tags(
endpoint=endpoint,
is_using_shelter="yes" if is_shelter_request else "no",
)

repo_visibility = None
if repository:
metrics_tags["repo_visibility"] = (
"private" if repository.private is True else "public"
)
if position:
metrics_tags["position"] = position
if upload_version:
metrics_tags["upload_version"] = upload_version
repo_visibility = "private" if repository.private else "public"

optional_fields = {
"repo_visibility": repo_visibility,
"position": position,
"upload_version": upload_version,
}
Comment on lines +815 to +819
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code doesn't look like it's doing anything different from the original code, and the original code is a bit simpler. What is the reason to create an optional_fields dict here?

Copy link
Author

@ghost ghost Oct 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the original code, the optional_fields are not created if we don't provide them in the arguments, and python's prometheus-client errors if we initialize a metric with N labels, but call them with some empty labels. This change allows it to create these fields that are filled with None such that they are present in the labels dict.

The original code does not create the fields if the corresponding arguments are not provided. However, the prometheus-client in Python raises an error if we initialize a metric with a certain number of labels but then call it with some empty labels. I introduced optional_fields dict here to ensure that these fields are always present in the labels dictionary, even if they are filled with None.

i.e. old code, generate_tags(..., endpoint="some_endpoint") returns:

{
   ...
   endpoint: "some_endpoint",
}

in the new code, it will return

{
   ...
   endpoint: "some_endpoint",
   repo_visibility: "None",
   position: "None",
   upload_version: "None",
}

If we still want the first dict to be returned, we can call generate tags with fill_labels=False.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see. I think that the new code will actually return

{
   ...
   endpoint: "some_endpoint",
   repo_visibility: None,
   position: None,
   upload_version: None,
}

where the values for the three extra fields are of None type (not string). Is this still fine?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked and that's fine, in prometheus it will interpret it as a string "None".


metrics_tags.update(
{
field: value
for field, value in optional_fields.items()
if value or include_empty_labels
}
)

return metrics_tags
16 changes: 16 additions & 0 deletions upload/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from shared.metrics import Counter

API_UPLOAD_COUNTER = Counter(
"api_upload",
"Total API upload endpoint requests",
[
"agent",
"version",
"action",
"endpoint",
"is_using_shelter",
"repo_visibility",
"position",
"upload_version",
],
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, I like this pattern of pulling it out into a separate file!
I see the other spots in this PR are following the existing pattern of keeping it in the views.py file because it would be a bigger lift to move those around, but this does seem like a nicer system going forward where possible!

8 changes: 4 additions & 4 deletions upload/tests/views/test_commits.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def test_commit_github_oidc_auth(mock_jwks_client, mock_jwt_decode, db, mocker):
private=False, author__username="codecov", name="the_repo"
)
mocked_call = mocker.patch.object(TaskService, "update_commit")
mock_sentry_metrics = mocker.patch("upload.views.commits.sentry_metrics.incr")
mock_prometheus_metrics = mocker.patch("upload.metrics.API_UPLOAD_COUNTER.labels")
mock_jwt_decode.return_value = {
"repository": f"url/{repository.name}",
"repository_owner": repository.author.username,
Expand Down Expand Up @@ -374,15 +374,15 @@ def test_commit_github_oidc_auth(mock_jwks_client, mock_jwt_decode, db, mocker):
}
assert expected_response == response_json
mocked_call.assert_called_with(commitid="commit_sha", repoid=repository.repoid)
mock_sentry_metrics.assert_called_with(
"upload",
tags={
mock_prometheus_metrics.assert_called_with(
**{
"agent": "cli",
"version": "0.4.7",
"action": "coverage",
"endpoint": "create_commit",
"repo_visibility": "public",
"is_using_shelter": "no",
"position": "end",
"upload_version": None,
},
)
8 changes: 4 additions & 4 deletions upload/tests/views/test_empty_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def test_empty_upload_with_yaml_ignored_files(
mocker.patch.object(
CanDoCoverageUploadsPermission, "has_permission", return_value=True
)
mock_sentry_metrics = mocker.patch("upload.views.empty_upload.sentry_metrics.incr")
mock_prometheus_metrics = mocker.patch("upload.metrics.API_UPLOAD_COUNTER.labels")
mock_final_yaml.return_value = UserYaml(
{
"ignore": [
Expand Down Expand Up @@ -93,16 +93,16 @@ def test_empty_upload_with_yaml_ignored_files(
notify_mock.assert_called_once_with(
repoid=repository.repoid, commitid=commit.commitid, empty_upload="pass"
)
mock_sentry_metrics.assert_called_with(
"upload",
tags={
mock_prometheus_metrics.assert_called_with(
**{
"agent": "cli",
"version": "0.4.7",
"action": "coverage",
"endpoint": "empty_upload",
"repo_visibility": "private",
"is_using_shelter": "no",
"position": "end",
"upload_version": None,
},
)

Expand Down
16 changes: 8 additions & 8 deletions upload/tests/views/test_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_reports_get_not_allowed(client, mocker, db):

def test_reports_post(client, db, mocker):
mocked_call = mocker.patch.object(TaskService, "preprocess_upload")
mock_sentry_metrics = mocker.patch("upload.views.reports.sentry_metrics.incr")
mock_prometheus_metrics = mocker.patch("upload.metrics.API_UPLOAD_COUNTER.labels")
repository = RepositoryFactory(
name="the_repo", author__username="codecov", author__service="github"
)
Expand All @@ -65,16 +65,16 @@ def test_reports_post(client, db, mocker):
commit_id=commit.id, code="code1", report_type=CommitReport.ReportType.COVERAGE
).exists()
mocked_call.assert_called_with(repository.repoid, commit.commitid, "code1")
mock_sentry_metrics.assert_called_with(
"upload",
tags={
mock_prometheus_metrics.assert_called_with(
**{
"agent": "cli",
"version": "0.4.7",
"action": "coverage",
"endpoint": "create_report",
"repo_visibility": "private",
"is_using_shelter": "no",
"position": "end",
"upload_version": None,
},
)

Expand Down Expand Up @@ -343,7 +343,7 @@ def test_reports_results_post_successful_github_oidc_auth(
mock_jwks_client, mock_jwt_decode, client, db, mocker
):
mocked_task = mocker.patch("services.task.TaskService.create_report_results")
mock_sentry_metrics = mocker.patch("upload.views.reports.sentry_metrics.incr")
mock_prometheus_metrics = mocker.patch("upload.metrics.API_UPLOAD_COUNTER.labels")
mocker.patch.object(
CanDoCoverageUploadsPermission, "has_permission", return_value=True
)
Expand Down Expand Up @@ -383,16 +383,16 @@ def test_reports_results_post_successful_github_oidc_auth(
report_id=commit_report.id,
).exists()
mocked_task.assert_called_once()
mock_sentry_metrics.assert_called_with(
"upload",
tags={
mock_prometheus_metrics.assert_called_with(
**{
"agent": "cli",
"version": "0.4.7",
"action": "coverage",
"endpoint": "create_report_results",
"repo_visibility": "private",
"is_using_shelter": "no",
"position": "end",
"upload_version": None,
},
)

Expand Down
8 changes: 4 additions & 4 deletions upload/tests/views/test_test_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

def test_upload_test_results(db, client, mocker, mock_redis):
upload = mocker.patch.object(TaskService, "upload")
mock_sentry_metrics = mocker.patch("upload.views.test_results.metrics.incr")
mock_prometheus_metrics = mocker.patch("upload.metrics.API_UPLOAD_COUNTER.labels")
create_presigned_put = mocker.patch(
"services.archive.StorageService.create_presigned_put",
return_value="test-presigned-put",
Expand Down Expand Up @@ -100,16 +100,16 @@ def test_upload_test_results(db, client, mocker, mock_redis):
report_code=None,
report_type="test_results",
)
mock_sentry_metrics.assert_called_with(
"upload",
tags={
mock_prometheus_metrics.assert_called_with(
**{
"agent": "cli",
"version": "0.4.7",
"action": "test_results",
"endpoint": "test_results",
"repo_visibility": "private",
"is_using_shelter": "no",
"position": "end",
"upload_version": None,
},
)

Expand Down
10 changes: 4 additions & 6 deletions upload/tests/views/test_upload_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,7 @@ def test_upload_completion_view_processed_uploads(mocked_manual_trigger, db, moc
mocker.patch.object(
CanDoCoverageUploadsPermission, "has_permission", return_value=True
)
mock_sentry_metrics = mocker.patch(
"upload.views.upload_completion.sentry_metrics.incr"
)
mock_prometheus_metrics = mocker.patch("upload.metrics.API_UPLOAD_COUNTER.labels")
repository = RepositoryFactory(
name="the_repo", author__username="codecov", author__service="github"
)
Expand Down Expand Up @@ -88,16 +86,16 @@ def test_upload_completion_view_processed_uploads(mocked_manual_trigger, db, moc
"uploads_error": 0,
}
mocked_manual_trigger.assert_called_once_with(repository.repoid, commit.commitid)
mock_sentry_metrics.assert_called_with(
"upload",
tags={
mock_prometheus_metrics.assert_called_with(
**{
"agent": "cli",
"version": "0.4.7",
"action": "coverage",
"endpoint": "upload_complete",
"repo_visibility": "private",
"is_using_shelter": "no",
"position": "end",
"upload_version": None,
},
)

Expand Down
Loading
Loading