Skip to content

Commit 7ad6d3c

Browse files
Bbn08fricklerhandwerk
authored andcommitted
fix: deduplicate Description and Reference records on CVE ingestion
1 parent a35e45b commit 7ad6d3c

File tree

2 files changed

+38
-12
lines changed

2 files changed

+38
-12
lines changed

src/shared/fetchers.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,12 @@ def make_media(data: dict[str, str]) -> models.SupportingMedia:
4444

4545

4646
def make_description(data: dict[str, Any]) -> models.Description:
47-
ctx: dict[str, Any] = dict()
48-
ctx["lang"] = data["lang"]
49-
ctx["value"] = data["value"]
50-
51-
obj = models.Description.objects.create(**ctx)
52-
obj.media.set(map(make_media, data.get("supportingMedia", [])))
47+
obj, created = models.Description.objects.get_or_create(
48+
lang=data["lang"],
49+
value=data["value"],
50+
)
51+
if created:
52+
obj.media.set(map(make_media, data.get("supportingMedia", [])))
5353

5454
return obj
5555

@@ -61,12 +61,12 @@ def make_tag(name: str) -> models.Tag:
6161

6262

6363
def make_reference(data: dict[str, Any]) -> models.Reference:
64-
ctx: dict[str, Any] = dict()
65-
ctx["url"] = data["url"]
66-
ctx["name"] = data.get("name", "")
67-
68-
obj = models.Reference.objects.create(**ctx)
69-
obj.tags.set(map(make_tag, data.get("tags", [])))
64+
obj, created = models.Reference.objects.get_or_create(
65+
url=data["url"],
66+
name=data.get("name", ""),
67+
)
68+
if created:
69+
obj.tags.set(map(make_tag, data.get("tags", [])))
7070

7171
return obj
7272

src/shared/tests/test_fetchers.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import pytest
2+
3+
from shared.fetchers import make_description, make_reference
4+
from shared.models.cve import Description, Reference
5+
6+
7+
@pytest.mark.django_db
8+
def test_make_description_deduplicates() -> None:
9+
"""Calling make_description twice with identical lang+value returns the same row."""
10+
data = {"lang": "en", "value": "A test vulnerability description."}
11+
first = make_description(data)
12+
second = make_description(data)
13+
14+
assert first.pk == second.pk
15+
assert Description.objects.filter(lang="en", value=data["value"]).count() == 1
16+
17+
18+
@pytest.mark.django_db
19+
def test_make_reference_deduplicates() -> None:
20+
"""Calling make_reference twice with identical url+name returns the same row."""
21+
data = {"url": "https://example.com/advisory", "name": "Advisory"}
22+
first = make_reference(data)
23+
second = make_reference(data)
24+
25+
assert first.pk == second.pk
26+
assert Reference.objects.filter(url=data["url"], name=data["name"]).count() == 1

0 commit comments

Comments
 (0)