Skip to content

Commit a1bd63e

Browse files
committed
add code to remove duplicate patterns
1 parent 8056b22 commit a1bd63e

File tree

1 file changed

+47
-0
lines changed

1 file changed

+47
-0
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# docker-compose -f local.yml run --rm django python manage.py deduplicate_patterns
2+
# docker-compose -f production.yml run --rm django python manage.py deduplicate_patterns
3+
4+
from collections import defaultdict
5+
6+
from django.core.management.base import BaseCommand
7+
from django.db.models import Count
8+
9+
from sde_collections.models.pattern import (
10+
DivisionPattern,
11+
DocumentTypePattern,
12+
ExcludePattern,
13+
IncludePattern,
14+
TitlePattern,
15+
)
16+
17+
18+
class Command(BaseCommand):
19+
help = "Remove duplicate patterns within collections for all pattern types"
20+
21+
def handle(self, *args, **kwargs):
22+
pattern_models = [ExcludePattern, IncludePattern, TitlePattern, DocumentTypePattern, DivisionPattern]
23+
24+
deletion_counts = defaultdict(int)
25+
26+
for model in pattern_models:
27+
# Get all collections that have duplicate patterns
28+
collections_with_dupes = (
29+
model.objects.values("collection", "match_pattern")
30+
.annotate(pattern_count=Count("id"))
31+
.filter(pattern_count__gt=1)
32+
)
33+
34+
for group in collections_with_dupes:
35+
# Get all patterns for this collection/match_pattern combo
36+
patterns = model.objects.filter(collection_id=group["collection"], match_pattern=group["match_pattern"])
37+
38+
# Keep one pattern, delete the rest
39+
patterns_to_delete = patterns[1:]
40+
for pattern in patterns_to_delete:
41+
pattern.delete()
42+
deletion_counts[model.__name__] += 1
43+
44+
# Print final summary
45+
for model_name, count in deletion_counts.items():
46+
self.stdout.write(f"{model_name}: {count}")
47+
self.stdout.write(f"Total: {sum(deletion_counts.values())}")

0 commit comments

Comments
 (0)