Skip to content

Commit 9ba79ab

Browse files
authored
fix(admin): update query to fetch most recent create (#19323)
1 parent a31a9c5 commit 9ba79ab

File tree

2 files changed

+48
-15
lines changed

2 files changed

+48
-15
lines changed

tests/unit/admin/views/test_observations.py

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -722,32 +722,52 @@ def test_with_both_quarantine_and_removal(self, db_request):
722722
assert result["removal_time"] is not None
723723

724724
def test_deleted_project_timeline(self, db_request):
725-
"""Test timeline stats for deleted projects using journal entries."""
725+
"""Test timeline stats for deleted projects using journal entries.
726+
727+
This also tests the project recreation scenario:
728+
- Original project created years ago
729+
- Original project removed
730+
- Malicious project created with same name (name squatting)
731+
- We should use the MOST RECENT create date, not the original
732+
"""
726733
admin_user = UserFactory.create(username="admin")
734+
original_owner = UserFactory.create(username="original-owner")
727735

728-
# Create a project, then simulate deletion by setting related=None
729-
# We need the journal entries to exist for lookup
730736
project_name = "deleted-test-project"
731-
project_created = datetime.now(tz=timezone.utc).replace(
732-
tzinfo=None
733-
) - timedelta(hours=48)
737+
now = datetime.now(tz=timezone.utc)
734738

735-
# Create journal entry for project creation (simulates historical record)
739+
# Simulate original project lifecycle (years ago)
740+
original_created = now.replace(tzinfo=None) - timedelta(days=365 * 3)
736741
JournalEntryFactory.create(
737742
name=project_name,
738743
action="create",
739-
submitted_by=admin_user,
740-
submitted_date=project_created,
744+
submitted_by=original_owner,
745+
submitted_date=original_created,
746+
)
747+
# Original project was removed (not relevant to our query, just context)
748+
JournalEntryFactory.create(
749+
name=project_name,
750+
action="remove project",
751+
submitted_by=original_owner,
752+
submitted_date=now.replace(tzinfo=None) - timedelta(days=30),
741753
)
742754

743-
# Create observation for deleted project (related=None)
744-
# The related_name will contain the project name in repr format
745-
now = datetime.now(tz=timezone.utc)
755+
# Malicious recreation - this is the `create` date we should use
756+
malicious_created = now.replace(tzinfo=None) - timedelta(hours=48)
757+
JournalEntryFactory.create(
758+
name=project_name,
759+
action="create",
760+
submitted_by=UserFactory.create(username="malicious-actor"),
761+
submitted_date=malicious_created,
762+
)
763+
764+
# Create observation for deleted project (related=None after removal)
765+
report_time = now.replace(tzinfo=None) - timedelta(hours=24)
746766
ProjectObservationFactory.create(
747767
kind="is_malware",
748768
related=None,
749769
related_name=f"Project(id=None, name='{project_name}')",
750-
created=now.replace(tzinfo=None) - timedelta(hours=24),
770+
created=report_time,
751771
actions={
752772
int(now.timestamp()): {
753773
"action": "remove_malware",
@@ -771,6 +791,15 @@ def test_deleted_project_timeline(self, db_request):
771791
# Should find the deleted project and calculate timeline stats
772792
assert result["sample_size"] == 1
773793
assert result["detection_time"] is not None
794+
795+
# Detection time should be ~24 hours (malicious_created -> report)
796+
# NOT ~3 years (original_created -> report)
797+
# malicious_created is 48h ago, report_time is 24h ago = 24h detection
798+
detection_hours = result["detection_time"]["median"]
799+
assert detection_hours < 100, (
800+
f"Detection time {detection_hours}h suggests we're using the old "
801+
f"'create' date instead of the most recent one"
802+
)
774803
assert result["quarantine_time"] is not None
775804
assert result["longest_lived"][0]["name"] == project_name
776805

warehouse/admin/views/observations.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -400,13 +400,17 @@ def _get_timeline_data(request: Request, observations: list) -> dict:
400400

401401
# Look up creation and quarantine times from journal entries in one query
402402
# Using conditional aggregation to get both in a single round-trip
403+
# NOTE: We use func.max() to get the MOST RECENT create/quarantine times.
404+
# Projects can be removed and recreated (e.g., name squatting after removal),
405+
# so we need the latest "create" to accurately measure detection time for
406+
# the malicious instance, not the original legitimate project.
403407
journal_stmt = (
404408
select(
405409
JournalEntry.name,
406-
func.min(JournalEntry.submitted_date)
410+
func.max(JournalEntry.submitted_date)
407411
.filter(JournalEntry.action == "create")
408412
.label("created_date"),
409-
func.min(JournalEntry.submitted_date)
413+
func.max(JournalEntry.submitted_date)
410414
.filter(
411415
JournalEntry.action == "project quarantined",
412416
JournalEntry._submitted_by == "admin",

0 commit comments

Comments
 (0)