Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 19 additions & 20 deletions src/sentry/api/endpoints/organization_trace_item_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from sentry.search.eap.constants import SUPPORTED_STATS_TYPES
from sentry.search.eap.resolver import SearchResolver
from sentry.search.eap.spans.attributes import (
SPAN_ATTRIBUTE_DEFINITIONS,
SPANS_INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS,
SPANS_STATS_EXCLUDED_ATTRIBUTES_PUBLIC_ALIAS,
)
from sentry.search.eap.spans.definitions import SPAN_DEFINITIONS
Expand Down Expand Up @@ -53,8 +53,8 @@ def get_result(self, limit, cursor=None):

offset = cursor.offset if cursor is not None else 0
# Request 1 more than limit so we can tell if there is another page
data = self.data_fn(offset=offset, limit=limit + 1)
has_more = data[1] >= limit + 1
data = self.data_fn(offset=offset, limit=limit)
has_more = data[1] >= offset + limit + 1

return CursorResult(
data,
Expand Down Expand Up @@ -145,6 +145,7 @@ def run_stats_query_with_span_ids(span_id_filter):
config=resolver_config,
search_resolver=resolver,
max_buckets=1,
skip_translate_internal_to_public_alias=True,
)

def run_stats_query_with_error_handling(attributes):
Expand All @@ -169,21 +170,27 @@ def data_fn(offset: int, limit: int):

preflight_stats = run_stats_query_with_span_ids(f"id:[{span_id_list}]")
try:
attr_keys = list(preflight_stats[0]["attribute_distributions"]["data"].keys())
internal_alias_attr_keys = list(
preflight_stats[0]["attribute_distributions"]["data"].keys()
)
except (IndexError, KeyError):
return {"data": []}, 0

sanitized_keys = []
for key in attr_keys:
if key in SPANS_STATS_EXCLUDED_ATTRIBUTES_PUBLIC_ALIAS:
for internal_name in internal_alias_attr_keys:
public_alias = SPANS_INTERNAL_TO_PUBLIC_ALIAS_MAPPINGS.get("string", {}).get(
internal_name, internal_name
)

if public_alias in SPANS_STATS_EXCLUDED_ATTRIBUTES_PUBLIC_ALIAS:
continue

if value_substring_match:
if value_substring_match in key:
sanitized_keys.append(key)
if value_substring_match in public_alias:
sanitized_keys.append(internal_name)
continue

sanitized_keys.append(key)
sanitized_keys.append(internal_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deduplication tracks internal names instead of public aliases

Medium Severity

The deduplication logic adds internal_name to sanitized_keys_set and checks against it, but internal_alias_attr_keys comes from dictionary keys which are already unique. The PR's goal is to deduplicate when different internal names map to the same public_alias, but the set tracks internal_name instead of public_alias. This means multiple internal names mapping to the same public alias will all be included, defeating the deduplication purpose.

Fix in Cursor Fix in Web


sanitized_keys = sanitized_keys[offset : offset + limit]

Expand All @@ -192,17 +199,9 @@ def data_fn(offset: int, limit: int):

request_attrs_list = []
for requested_key in sanitized_keys:
if requested_key in SPAN_ATTRIBUTE_DEFINITIONS:
request_attrs_list.append(
AttributeKey(
name=SPAN_ATTRIBUTE_DEFINITIONS[requested_key].internal_name,
type=AttributeKey.TYPE_STRING,
)
)
else:
request_attrs_list.append(
AttributeKey(name=requested_key, type=AttributeKey.TYPE_STRING)
)
request_attrs_list.append(
AttributeKey(name=requested_key, type=AttributeKey.TYPE_STRING)
)

chunked_attributes: defaultdict[int, list[AttributeKey]] = defaultdict(list)
for i, attr in enumerate(request_attrs_list):
Expand Down
18 changes: 13 additions & 5 deletions src/sentry/snuba/spans_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ def run_stats_query(
search_resolver: SearchResolver | None = None,
attributes: list[AttributeKey] | None = None,
max_buckets: int = 75,
skip_translate_internal_to_public_alias: bool = False,
) -> list[dict[str, Any]]:
search_resolver = search_resolver or cls.get_resolver(params, config)
stats_filter, _, _ = search_resolver.resolve_query(query_string)
Expand Down Expand Up @@ -344,11 +345,18 @@ def run_stats_query(
continue

for bucket in attribute.buckets:
public_alias, _, _ = translate_internal_to_public_alias(
attribute.attribute_name, "string", SupportedTraceItemType.SPANS
)
public_alias = public_alias or attribute.attribute_name
attrs[public_alias].append({"label": bucket.label, "value": bucket.value})
if skip_translate_internal_to_public_alias:
attrs[attribute.attribute_name].append(
{"label": bucket.label, "value": bucket.value}
)
else:
public_alias, _, _ = translate_internal_to_public_alias(
attribute.attribute_name, "string", SupportedTraceItemType.SPANS
)
public_alias = public_alias or attribute.attribute_name
attrs[public_alias].append(
{"label": bucket.label, "value": bucket.value}
)
stats.append({"attribute_distributions": {"data": attrs}})

return stats
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,7 @@ def test_custom_limit_parameter(self) -> None:

assert len(response.data["data"]) == 1
attribute_distribution = response.data["data"][0]["attribute_distributions"]["data"]
assert (
len(attribute_distribution) == 2
) # We return limit + 1 to check if there is a next page
assert len(attribute_distribution) == 1

if "Link" in response:
links = {}
Expand Down
Loading