Skip to content

Commit f29a346

Browse files
authored
fix(backup): Fix various small import and comparison bugs (#60593)
For this PR, we fix the following: 1. No longer crash when comparing `None` with a nonexistent field. Previously, this behavior was correct when comparing either of a `None` or missing field to itself, or a present field, but not when comparing with one another. 2. Remove existence check for `IgnoredComparator`. This makes no sense: if we are ignoring the field for comparison purposes, we should not raise findings when one side is missing. 3. Ignore country and/or region code re-assignments. Since we are accepting whatever codes are assigned by the geo IP handler on the receiving side, we should not raise alerts if this modules comes to a different result than the exporter. Instead, we should quietly accept the new value, and ignore the fields during comparison. 4. Allow null `expires_at` and `refresh_token` for the `ApiToken` model. Previously, we auto-assigned these fields. But we've since encountered instances where these are left null on purpose, so we should not mutate this apparently valid persisted state at import time. 5. Handle unordered list fields when comparing. This is relevant specifically to the `scope_list` field of `ApiToken`, which seems to get the order of members shuffled a bit.
1 parent 517ce11 commit f29a346

File tree

6 files changed

+324
-45
lines changed

6 files changed

+324
-45
lines changed

src/sentry/backup/comparators.py

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919

2020

2121
class ScrubbedData:
22-
"""A singleton class used to indicate data has been scrubbed, without indicating what that data is. A unit type indicating "scrubbing was successful" only."""
22+
"""A singleton class used to indicate data has been scrubbed, without indicating what that data
23+
is. A unit type indicating "scrubbing was successful" only."""
2324

2425
instance: ScrubbedData
2526

@@ -70,9 +71,11 @@ def existence(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Com
7071

7172
findings = []
7273
for f in self.fields:
73-
if f not in left["fields"] and f not in right["fields"]:
74+
missing_on_left = f not in left["fields"] or left["fields"][f] is None
75+
missing_on_right = f not in right["fields"] or right["fields"][f] is None
76+
if missing_on_left and missing_on_right:
7477
continue
75-
if f not in left["fields"]:
78+
if missing_on_left:
7679
findings.append(
7780
ComparatorFinding(
7881
kind=self.get_kind_existence_check(),
@@ -82,7 +85,7 @@ def existence(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Com
8285
reason=f"the left `{f}` value was missing",
8386
)
8487
)
85-
if f not in right["fields"]:
88+
if missing_on_right:
8689
findings.append(
8790
ComparatorFinding(
8891
kind=self.get_kind_existence_check(),
@@ -461,13 +464,20 @@ def truncate(self, data: list[str]) -> list[str]:
461464
class IgnoredComparator(JSONScrubbingComparator):
462465
"""Ensures that two fields are tested for mutual existence, and nothing else.
463466
464-
Using this class means that you are foregoing comparing the relevant field(s), so please make sure you are validating them some other way!"""
467+
Using this class means that you are foregoing comparing the relevant field(s), so please make
468+
sure you are validating them some other way!
469+
"""
465470

466-
def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
471+
def compare(self, _o: InstanceID, _l: JSONData, _r: JSONData) -> list[ComparatorFinding]:
467472
"""Noop - there is nothing to compare once we've checked for existence."""
468473

469474
return []
470475

476+
def existence(self, _o: InstanceID, _l: JSONData, _r: JSONData) -> list[ComparatorFinding]:
477+
"""Noop - never compare existence for ignored fields, they're ignored after all."""
478+
479+
return []
480+
471481

472482
class RegexComparator(JSONScrubbingComparator, ABC):
473483
"""Comparator that ensures that both sides match a certain regex."""
@@ -517,7 +527,8 @@ def __init__(self, bytes: int, *fields: str):
517527

518528

519529
class SubscriptionIDComparator(RegexComparator):
520-
"""Compare the basic format of `QuerySubscription` IDs, which is basically a UUID1 with a numeric prefix. Ensure that the two values are NOT equivalent."""
530+
"""Compare the basic format of `QuerySubscription` IDs, which is basically a UUID1 with a
531+
numeric prefix. Ensure that the two values are NOT equivalent."""
521532

522533
def __init__(self, *fields: str):
523534
super().__init__(re.compile("^\\d+/[0-9a-f]{32}$"), *fields)
@@ -549,11 +560,38 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Compa
549560
return findings
550561

551562

563+
class UnorderedListComparator(JSONScrubbingComparator):
564+
"""Comparator for fields that are lists of unordered elements, which simply orders them before
565+
doing the comparison."""
566+
567+
def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
568+
findings = []
569+
fields = sorted(self.fields)
570+
for f in fields:
571+
if left["fields"].get(f) is None and right["fields"].get(f) is None:
572+
continue
573+
574+
lv = left["fields"][f] or []
575+
rv = right["fields"][f] or []
576+
if sorted(lv) != sorted(rv):
577+
findings.append(
578+
ComparatorFinding(
579+
kind=self.get_kind(),
580+
on=on,
581+
left_pk=left["pk"],
582+
right_pk=right["pk"],
583+
reason=f"""the left value ({lv}) of the unordered list field `{f}` was not equal to the right value ({rv})""",
584+
)
585+
)
586+
return findings
587+
588+
552589
# Note: we could also use the `uuid` Python uuid module for this, but it is finicky and accepts some
553590
# weird syntactic variations that are not very common and may cause weird failures when they are
554591
# rejected elsewhere.
555592
class UUID4Comparator(RegexComparator):
556-
"""UUIDs must be regenerated on import (otherwise they would not be unique...). This comparator ensures that they retain their validity, but are not equivalent."""
593+
"""UUIDs must be regenerated on import (otherwise they would not be unique...). This comparator
594+
ensures that they retain their validity, but are not equivalent."""
557595

558596
def __init__(self, *fields: str):
559597
super().__init__(
@@ -590,7 +628,8 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Compa
590628

591629

592630
def auto_assign_datetime_equality_comparators(comps: ComparatorMap) -> None:
593-
"""Automatically assigns the DateAddedComparator to any `DateTimeField` that is not already claimed by the `DateUpdatedComparator`."""
631+
"""Automatically assigns the DateAddedComparator to any `DateTimeField` that is not already
632+
claimed by the `DateUpdatedComparator`."""
594633

595634
exportable = get_exportable_sentry_models()
596635
for e in exportable:
@@ -616,7 +655,8 @@ def auto_assign_datetime_equality_comparators(comps: ComparatorMap) -> None:
616655

617656

618657
def auto_assign_email_obfuscating_comparators(comps: ComparatorMap) -> None:
619-
"""Automatically assigns the EmailObfuscatingComparator to any field that is an `EmailField` or has a foreign key into the `sentry.User` table."""
658+
"""Automatically assigns the EmailObfuscatingComparator to any field that is an `EmailField` or
659+
has a foreign key into the `sentry.User` table."""
620660

621661
exportable = get_exportable_sentry_models()
622662
for e in exportable:
@@ -667,7 +707,8 @@ def get_default_comparators():
667707
list,
668708
{
669709
"sentry.apitoken": [
670-
HashObfuscatingComparator("refresh_token", "token", "token_last_characters")
710+
HashObfuscatingComparator("refresh_token", "token", "token_last_characters"),
711+
UnorderedListComparator("scope_list"),
671712
],
672713
"sentry.apiapplication": [HashObfuscatingComparator("client_id", "client_secret")],
673714
"sentry.authidentity": [HashObfuscatingComparator("ident", "token")],
@@ -716,7 +757,13 @@ def get_default_comparators():
716757
DateUpdatedComparator("date_hash_added"),
717758
IgnoredComparator("validation_hash", "is_verified"),
718759
],
719-
"sentry.userip": [DateUpdatedComparator("first_seen", "last_seen")],
760+
"sentry.userip": [
761+
DateUpdatedComparator("first_seen", "last_seen"),
762+
# Incorrect country and region codes may be updated during an import, so we don't
763+
# want to compare them explicitly. This update is pulled from the geo IP service, so
764+
# we only really want to compare the IP address itself.
765+
IgnoredComparator("country_code", "region_code"),
766+
],
720767
"sentry.userrole": [DateUpdatedComparator("date_updated")],
721768
"sentry.userroleuser": [DateUpdatedComparator("date_updated")],
722769
},

src/sentry/backup/findings.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,6 @@ class ComparatorFindingKind(FindingKind):
9292
# Failed to compare an ignored field.
9393
IgnoredComparator = auto()
9494

95-
# Failed to compare an ignored field because one of the fields being compared was not present or
96-
# `None`.
97-
IgnoredComparatorExistenceCheck = auto()
98-
9995
# Secret token fields did not match their regex specification.
10096
SecretHexComparator = auto()
10197

@@ -110,6 +106,13 @@ class ComparatorFindingKind(FindingKind):
110106
# present or `None`.
111107
SubscriptionIDComparatorExistenceCheck = auto()
112108

109+
# Unordered list fields did not match.
110+
UnorderedListComparator = auto()
111+
112+
# Failed to compare a unordered list field because one of the fields being compared was not
113+
# present or `None`.
114+
UnorderedListComparatorExistenceCheck = auto()
115+
113116
# UUID4 fields did not match their regex specification.
114117
UUID4Comparator = auto()
115118

src/sentry/models/apitoken.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,11 @@ def write_relocation_import(
120120
query = models.Q(token=self.token) | models.Q(refresh_token=self.refresh_token)
121121
existing = self.__class__.objects.filter(query).first()
122122
if existing:
123-
self.expires_at = timezone.now() + DEFAULT_EXPIRATION
124123
self.token = generate_token()
125-
self.refresh_token = generate_token()
124+
if self.refresh_token is not None:
125+
self.refresh_token = generate_token()
126+
if self.expires_at is not None:
127+
self.expires_at = timezone.now() + DEFAULT_EXPIRATION
126128

127129
return super().write_relocation_import(scope, flags)
128130

src/sentry/runner/commands/backup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def get_encryptor_from_flags(
172172

173173
def write_findings(findings_file: TextIO | None, findings: Sequence[Finding], printer: Callable):
174174
for f in findings:
175-
printer(f.pretty(), err=True)
175+
printer(f"\n\n{f.pretty()}", err=True)
176176

177177
if findings_file:
178178
findings_encoder = FindingJSONEncoder(
@@ -310,11 +310,11 @@ def load_data(
310310

311311
res = validate(left_data, right_data, get_default_comparators())
312312
if res:
313+
click.echo(f"\n\nDone, found {len(res.findings)} differences:")
313314
write_findings(findings_file, res.findings, click.echo)
314-
click.echo(f"Done, found {len(res.findings)} differences:\n\n{res.pretty()}")
315315
else:
316+
click.echo("\n\nDone, found 0 differences!")
316317
write_findings(findings_file, [], click.echo)
317-
click.echo("Done, found 0 differences!")
318318

319319

320320
@backup.command(name="decrypt")

0 commit comments

Comments
 (0)