Skip to content

Commit 11011d4

Browse files
authored
feat(backup): Support foreign key remapping (#54610)
When importing into a database that is not entirely flushed, sequences and all, we run into a problem: the foriegn keys in the source JSON file will not match the primary keys of the newly `INSERT`ed models we will be importing. This will cause downstream imports that depend on upstream imports to fail. To resolve this, we maintain a `PrimaryKeyMap` of old pks to new pks for every model. As we import models, we take note of their new pks, so that when foreign key references to these models are encountered later on, we can perform a simple replacement. This generally works well enough, but because we have a circular dependency between `Actor` and `Team`, we must take care to do the appropriate set of dance moves to avoid writing `Actor`s with (necessarily) non-existent `Team` references. To test these changes, I've modified all of `test_models` to *not* reset sequences between database uploads. This should ensure that every such test will produce two JSON files with differing pks, which should give us fairly thorough coverage. Issue: getsentry/team-ospo#170 Issue: getsentry/team-ospo#171
1 parent 7518d18 commit 11011d4

File tree

8 files changed

+230
-38
lines changed

8 files changed

+230
-38
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
[
2+
{
3+
"model": "sentry.email",
4+
"pk": 34,
5+
"fields": {
6+
"email": "[email protected]",
7+
"date_added": "2023-06-22T00:00:00.123Z"
8+
}
9+
},
10+
{
11+
"model": "sentry.user",
12+
"pk": 12,
13+
"fields": {
14+
"password": "pbkdf2_sha256$150000$iEvdIknqYjTr$+QsGn0tfIJ1FZLxQI37mVU1gL2KbL/wqjMtG/dFhsMA=",
15+
"last_login": null,
16+
"username": "[email protected]",
17+
"name": "",
18+
"email": "[email protected]",
19+
"is_staff": true,
20+
"is_active": true,
21+
"is_superuser": true,
22+
"is_managed": false,
23+
"is_sentry_app": null,
24+
"is_password_expired": false,
25+
"last_password_change": "2023-06-22T22:59:57.023Z",
26+
"flags": "0",
27+
"session_nonce": null,
28+
"date_joined": "2023-06-22T22:59:55.488Z",
29+
"last_active": "2023-06-22T22:59:55.489Z",
30+
"avatar_type": 0,
31+
"avatar_url": null
32+
}
33+
},
34+
{
35+
"model": "sentry.useremail",
36+
"pk": 56,
37+
"fields": {
38+
"user": 12,
39+
"email": "[email protected]",
40+
"validation_hash": "mCnWesSVvYQcq7qXQ36AZHwosAd6cghE",
41+
"date_hash_added": "2023-06-22T00:00:00.456Z",
42+
"is_verified": false
43+
}
44+
},
45+
{
46+
"model": "sentry.userrole",
47+
"pk": 78,
48+
"fields": {
49+
"date_updated": "2023-06-22T23:00:00.123Z",
50+
"date_added": "2023-06-22T22:54:27.960Z",
51+
"name": "Super Admin",
52+
"permissions": "['broadcasts.admin', 'users.admin', 'options.admin']"
53+
}
54+
},
55+
{
56+
"model": "sentry.userroleuser",
57+
"pk": 90,
58+
"fields": {
59+
"date_updated": "2023-06-22T23:00:00.123Z",
60+
"date_added": "2023-06-22T22:59:57.000Z",
61+
"user": 12,
62+
"role": 78
63+
}
64+
}
65+
]

src/sentry/backup/comparators.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from abc import ABC, abstractmethod
44
from collections import defaultdict
5-
from typing import Callable, Dict, List
5+
from typing import Callable, Dict, List, Type
66

77
from dateutil import parser
88
from django.db import models
@@ -11,6 +11,8 @@
1111
from sentry.backup.findings import ComparatorFinding, ComparatorFindingKind, InstanceID
1212
from sentry.backup.helpers import Side, get_exportable_final_derivations_of
1313
from sentry.db.models import BaseModel
14+
from sentry.models.team import Team
15+
from sentry.models.user import User
1416
from sentry.utils.json import JSONData
1517

1618

@@ -209,7 +211,7 @@ class ForeignKeyComparator(JSONScrubbingComparator):
209211
left_pk_map: PrimaryKeyMap | None = None
210212
right_pk_map: PrimaryKeyMap | None = None
211213

212-
def __init__(self, foreign_fields: dict[str, models.base.ModelBase]):
214+
def __init__(self, foreign_fields: dict[str, Type[models.base.Model]]):
213215
super().__init__(*(foreign_fields.keys()))
214216
self.foreign_fields = foreign_fields
215217

@@ -223,7 +225,8 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Compa
223225
findings = []
224226
fields = sorted(self.fields)
225227
for f in fields:
226-
field_model_name = "sentry." + self.foreign_fields[f].__name__.lower()
228+
obj_name = self.foreign_fields[f]._meta.object_name.lower() # type: ignore[union-attr]
229+
field_model_name = "sentry." + obj_name
227230
if left["fields"].get(f) is None and right["fields"].get(f) is None:
228231
continue
229232

@@ -429,6 +432,8 @@ def build_default_comparators():
429432
comparators: ComparatorMap = defaultdict(
430433
list,
431434
{
435+
# TODO(hybrid-cloud): actor refactor. Remove this entry when done.
436+
"sentry.actor": [ForeignKeyComparator({"team": Team, "user_id": User})],
432437
"sentry.apitoken": [HashObfuscatingComparator("refresh_token", "token")],
433438
"sentry.apiapplication": [HashObfuscatingComparator("client_id", "client_secret")],
434439
"sentry.authidentity": [HashObfuscatingComparator("ident", "token")],

src/sentry/backup/dependencies.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from collections import defaultdict
44
from enum import Enum, auto, unique
5-
from typing import NamedTuple
5+
from typing import NamedTuple, Type
66

77
from django.db import models
88
from django.db.models.fields.related import ForeignKey, OneToOneField
@@ -35,24 +35,24 @@ class ForeignFieldKind(Enum):
3535
class ForeignField(NamedTuple):
3636
"""A field that creates a dependency on another Sentry model."""
3737

38-
model: models.base.ModelBase
38+
model: Type[models.base.Model]
3939
kind: ForeignFieldKind
4040

4141

4242
class ModelRelations(NamedTuple):
4343
"""What other models does this model depend on, and how?"""
4444

45-
model: models.base.ModelBase
45+
model: Type[models.base.Model]
4646
foreign_keys: dict[str, ForeignField]
4747
silos: list[SiloMode]
4848

49-
def flatten(self) -> set[models.base.ModelBase]:
49+
def flatten(self) -> set[Type[models.base.Model]]:
5050
"""Returns a flat list of all related models, omitting the kind of relation they have."""
5151

5252
return {ff.model for ff in self.foreign_keys.values()}
5353

5454

55-
def normalize_model_name(model):
55+
def normalize_model_name(model: Type[models.base.Model]):
5656
return f"{model._meta.app_label}.{model._meta.object_name}"
5757

5858

@@ -61,8 +61,11 @@ class DependenciesJSONEncoder(json.JSONEncoder):
6161
`ModelRelations`."""
6262

6363
def default(self, obj):
64-
if isinstance(obj, models.base.ModelBase):
65-
return normalize_model_name(obj)
64+
if isinstance(obj, models.base.Model):
65+
return normalize_model_name(type(obj))
66+
if meta := getattr(obj, "_meta", None):
67+
# Note: done to accommodate `node.Nodestore`.
68+
return f"{meta.app_label}.{meta.object_name}"
6669
if isinstance(obj, ForeignFieldKind):
6770
return obj.name
6871
if isinstance(obj, SiloMode):
@@ -226,7 +229,7 @@ def sorted_dependencies():
226229
"Can't resolve dependencies for %s in serialized app list."
227230
% ", ".join(
228231
normalize_model_name(m.model)
229-
for m in sorted(skipped, key=lambda obj: normalize_model_name(obj))
232+
for m in sorted(skipped, key=lambda mr: normalize_model_name(mr.model))
230233
)
231234
)
232235
model_dependencies_list = skipped

src/sentry/backup/imports.py

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
from django.apps import apps
88
from django.core import management, serializers
99
from django.db import IntegrityError, connection, transaction
10+
from django.forms import model_to_dict
1011

12+
from sentry.backup.dependencies import PrimaryKeyMap, dependencies, normalize_model_name
1113
from sentry.backup.helpers import EXCLUDED_APPS
1214

1315

@@ -27,16 +29,79 @@ class OldImportConfig(NamedTuple):
2729

2830

2931
def imports(src, old_config: OldImportConfig, printer=click.echo):
30-
"""CLI command wrapping the `exec_import` functionality."""
32+
"""Imports core data for the Sentry installation."""
33+
34+
# TODO(hybrid-cloud): actor refactor. Remove this import when done.
35+
from sentry.models.actor import Actor
3136

3237
try:
3338
# Import / export only works in monolith mode with a consolidated db.
3439
with transaction.atomic("default"):
40+
pk_map = PrimaryKeyMap()
41+
deps = dependencies()
42+
3543
for obj in serializers.deserialize(
3644
"json", src, stream=True, use_natural_keys=old_config.use_natural_foreign_keys
3745
):
3846
if obj.object._meta.app_label not in EXCLUDED_APPS:
39-
obj.save()
47+
# TODO(getsentry/team-ospo#183): This conditional should be removed once we want
48+
# to roll out the new API to self-hosted.
49+
if old_config.use_update_instead_of_create:
50+
obj.save()
51+
else:
52+
o = obj.object
53+
label = o._meta.label_lower
54+
model_name = normalize_model_name(o)
55+
for field, model_relation in deps[model_name].foreign_keys.items():
56+
field_id = f"{field}_id"
57+
fk = getattr(o, field_id, None)
58+
if fk is not None:
59+
new_pk = pk_map.get(normalize_model_name(model_relation.model), fk)
60+
# TODO(getsentry/team-ospo#167): Will allow missing items when we
61+
# implement org-based filtering.
62+
setattr(o, field_id, new_pk)
63+
64+
old_pk = o.pk
65+
o.pk = None
66+
o.id = None
67+
68+
# TODO(hybrid-cloud): actor refactor. Remove this conditional when done.
69+
#
70+
# `Actor` and `Team` have a direct circular dependency between them for the
71+
# time being due to an ongoing refactor (that is, `Actor` foreign keys
72+
# directly into `Team`, and `Team` foreign keys directly into `Actor`). If
73+
# we use `INSERT` database calls naively, they will always fail, because one
74+
# half of the cycle will always be missing.
75+
#
76+
# Because `Actor` ends up first in the dependency sorting (see:
77+
# fixtures/backup/model_dependencies/sorted.json), a viable solution here is
78+
# to always null out the `team_id` field of the `Actor` when we write it,
79+
# and then make sure to circle back and update the relevant actor after we
80+
# create the `Team` models later on (see snippet at the end of this scope).
81+
if label == "sentry.actor":
82+
o.team_id = None
83+
84+
# TODO(getsentry/team-ospo#181): what's up with email/useremail here? Seems
85+
# like both gets added with `sentry.user` simultaneously? Will need to make
86+
# more robust user handling logic, and to test what happens when a UserEmail
87+
# already exists.
88+
if label == "sentry.useremail":
89+
(o, _) = o.__class__.objects.get_or_create(
90+
user=o.user, email=o.email, defaults=model_to_dict(o)
91+
)
92+
pk_map.insert(model_name, old_pk, o.pk)
93+
continue
94+
95+
obj.save(force_insert=True)
96+
pk_map.insert(model_name, old_pk, o.pk)
97+
98+
# TODO(hybrid-cloud): actor refactor. Remove this conditional when done.
99+
if label == "sentry.team":
100+
if o.actor_id is not None:
101+
actor = Actor.objects.get(pk=o.actor_id)
102+
actor.team_id = o.pk
103+
actor.save()
104+
40105
# For all database integrity errors, let's warn users to follow our
41106
# recommended backup/restore workflow before reraising exception. Most of
42107
# these errors come from restoring on a different version of Sentry or not restoring

src/sentry/backup/validate.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,19 +129,24 @@ def json_lines(obj: JSONData) -> list[str]:
129129
left_pk_map = PrimaryKeyMap()
130130
right_pk_map = PrimaryKeyMap()
131131

132-
# We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both
133-
# outputs.
132+
# Save the pk -> ordinal mapping on both sides, so that we can decode foreign keys into this
133+
# model that we encounter later.
134134
for id, right in right_models.items():
135135
if id.ordinal is None:
136136
raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set")
137137

138-
# Save the pk -> ordinal mapping on both sides, so that we can decode foreign keys into this
139-
# model that we encounter later.
140138
left = left_models[id]
141-
left_pk_map.insert(id.model, left["pk"], id.ordinal)
139+
left_pk_map.insert(id.model, left_models[id]["pk"], id.ordinal)
142140
right_pk_map.insert(id.model, right["pk"], id.ordinal)
143141

142+
# We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both
143+
# outputs.
144+
for id, right in right_models.items():
145+
if id.ordinal is None:
146+
raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set")
147+
144148
# Try comparators applicable for this specific model.
149+
left = left_models[id]
145150
if id.model in comparators:
146151
# We take care to run ALL of the `compare()` methods on each comparator before calling
147152
# any `scrub()` methods. This ensures that, in cases where a single model uses multiple

src/sentry/testutils/helpers/backups.py

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,27 @@
33
import tempfile
44
from pathlib import Path
55

6+
from django.apps import apps
67
from django.core.management import call_command
8+
from django.db import connections, router, transaction
79

810
from sentry.backup.comparators import ComparatorMap
11+
from sentry.backup.dependencies import sorted_dependencies
912
from sentry.backup.exports import OldExportConfig, exports
1013
from sentry.backup.findings import ComparatorFindings
11-
from sentry.backup.helpers import get_exportable_final_derivations_of, get_final_derivations_of
1214
from sentry.backup.imports import OldImportConfig, imports
1315
from sentry.backup.validate import validate
16+
from sentry.models.integrations.sentry_app import SentryApp
1417
from sentry.silo import unguarded_write
1518
from sentry.testutils.factories import get_fixture_path
1619
from sentry.utils import json
1720
from sentry.utils.json import JSONData
1821

1922
__all__ = [
20-
"ValidationError",
2123
"export_to_file",
22-
"get_final_derivations_of",
23-
"get_exportable_final_derivations_of",
2424
"import_export_then_validate",
2525
"import_export_from_fixture_then_validate",
26+
"ValidationError",
2627
]
2728

2829
NOOP_PRINTER = lambda *args, **kwargs: None
@@ -46,7 +47,30 @@ def export_to_file(path: Path) -> JSONData:
4647
return output
4748

4849

49-
def import_export_then_validate(method_name: str) -> JSONData:
50+
REVERSED_DEPENDENCIES = sorted_dependencies()
51+
REVERSED_DEPENDENCIES.reverse()
52+
53+
54+
def clear_database_but_keep_sequences():
55+
"""Deletes all models we care about from the database, in a sequence that ensures we get no
56+
foreign key errors."""
57+
58+
with unguarded_write(using="default"), transaction.atomic(using="default"):
59+
for model in REVERSED_DEPENDENCIES:
60+
# For some reason, the tables for `SentryApp*` models don't get deleted properly here
61+
# when using `model.objects.all().delete()`, so we have to call out to Postgres
62+
# manually.
63+
connection = connections[router.db_for_write(SentryApp)]
64+
with connection.cursor() as cursor:
65+
table = model._meta.db_table
66+
cursor.execute(f"DELETE FROM {table:s};")
67+
68+
# Clear remaining tables that are not explicitly in Sentry's own model dependency graph.
69+
for model in set(apps.get_models()) - set(REVERSED_DEPENDENCIES):
70+
model.objects.all().delete()
71+
72+
73+
def import_export_then_validate(method_name: str, *, reset_pks: bool = True) -> JSONData:
5074
"""Test helper that validates that dat imported from an export of the current state of the test
5175
database correctly matches the actual outputted export data."""
5276

@@ -60,10 +84,14 @@ def import_export_then_validate(method_name: str) -> JSONData:
6084

6185
# Write the contents of the "expected" JSON file into the now clean database.
6286
# TODO(Hybrid-Cloud): Review whether this is the correct route to apply in this case.
63-
with unguarded_write(using="default"), open(tmp_expect) as tmp_file:
64-
# Reset the Django database.
65-
call_command("flush", verbosity=0, interactive=False)
66-
imports(tmp_file, OldImportConfig(), NOOP_PRINTER)
87+
with unguarded_write(using="default"):
88+
if reset_pks:
89+
call_command("flush", verbosity=0, interactive=False)
90+
else:
91+
clear_database_but_keep_sequences()
92+
93+
with open(tmp_expect) as tmp_file:
94+
imports(tmp_file, OldImportConfig(), NOOP_PRINTER)
6795

6896
# Validate that the "expected" and "actual" JSON matches.
6997
actual = export_to_file(tmp_actual)

0 commit comments

Comments
 (0)