14
14
DeltaIncludePattern ,
15
15
DeltaTitlePattern ,
16
16
)
17
- from sde_collections .models .delta_url import CuratedUrl , DeltaUrl
17
+ from sde_collections .models .delta_url import CuratedUrl , DeltaUrl , DumpUrl
18
18
from sde_collections .models .pattern import (
19
19
DivisionPattern ,
20
20
DocumentTypePattern ,
@@ -50,6 +50,7 @@ def handle(self, *args, **kwargs):
50
50
51
51
# Step 1: Clear all Delta instances
52
52
start_time = time .time ()
53
+ DumpUrl .objects .all ().delete ()
53
54
CuratedUrl .objects .all ().delete ()
54
55
DeltaUrl .objects .all ().delete ()
55
56
DeltaExcludePattern .objects .all ().delete ()
@@ -59,22 +60,26 @@ def handle(self, *args, **kwargs):
59
60
DeltaDivisionPattern .objects .all ().delete ()
60
61
self .stdout .write (f"Cleared all Delta instances in { time .time () - start_time :.2f} seconds." )
61
62
62
- # Step 2: Get collections with Candidate URLs
63
+ # Step 2: Get collections ordered by URL count
63
64
start_time = time .time ()
64
- all_collections_with_urls = Collection .objects .annotate (url_count = Count ("candidate_urls" )).filter (
65
- url_count__gt = 0
66
- )
67
- self .stdout .write (f"Collected collections with URLs in { time .time () - start_time :.2f} seconds." )
65
+ total_collections = Collection .objects .count ()
66
+ collections = Collection .objects .annotate (url_count = Count ("candidate_urls" )).order_by ("url_count" )
67
+ self .stdout .write (f"Retrieved and ordered collections in { time .time () - start_time :.2f} seconds." )
68
68
69
- # Step 3: Migrate all CandidateURLs to DeltaUrl
70
- start_time = time .time ()
71
69
# Set to track URLs globally across all collections
72
70
global_unique_urls = set ()
73
71
74
- for collection in all_collections_with_urls :
72
+ # Process each collection individually
73
+ for index , collection in enumerate (collections ):
74
+ collection_start_time = time .time ()
75
+ self .stdout .write (
76
+ f"\n Processing collection: { collection } with { collection .url_count } URLs ({ index + 1 } /{ total_collections } )" # noqa
77
+ )
78
+
79
+ # Step 3: Migrate CandidateURLs to DeltaUrl for this collection
80
+ urls_start_time = time .time ()
75
81
delta_urls = []
76
82
77
- # Filter CandidateURL objects, ensuring each URL is globally unique
78
83
for candidate_url in CandidateURL .objects .filter (collection = collection ):
79
84
if candidate_url .url not in global_unique_urls :
80
85
global_unique_urls .add (candidate_url .url )
@@ -93,69 +98,50 @@ def handle(self, *args, **kwargs):
93
98
94
99
# Bulk create the unique DeltaUrl instances for this collection
95
100
DeltaUrl .objects .bulk_create (delta_urls )
101
+ self .stdout .write (
102
+ f"Migrated { len (delta_urls )} URLs to DeltaUrl in { time .time () - urls_start_time :.2f} seconds"
103
+ )
96
104
97
- self .stdout .write (f"Migrated CandidateURLs to DeltaUrl in { time .time () - start_time :.2f} seconds." )
98
-
99
- # Step 4: Migrate Patterns
100
- start_time = time .time ()
101
-
102
- pattern_start_time = time .time ()
103
- self .migrate_patterns (ExcludePattern )
104
- self .stdout .write (f"ExcludePattern migration completed in { time .time () - pattern_start_time :.2f} seconds." )
105
+ # Step 4: Migrate Patterns for this collection
106
+ patterns_start_time = time .time ()
105
107
106
- pattern_start_time = time .time ()
107
- self .migrate_patterns (IncludePattern )
108
- self .stdout .write (f"IncludePattern migration completed in { time .time () - pattern_start_time :.2f} seconds." )
108
+ for pattern_model in [ExcludePattern , IncludePattern , TitlePattern , DocumentTypePattern , DivisionPattern ]:
109
+ self .migrate_patterns_for_collection (pattern_model , collection )
109
110
110
- pattern_start_time = time .time ()
111
- self .migrate_patterns (TitlePattern )
112
- self .stdout .write (f"TitlePattern migration completed in { time .time () - pattern_start_time :.2f} seconds." )
111
+ self .stdout .write (f"Pattern migration completed in { time .time () - patterns_start_time :.2f} seconds" )
113
112
114
- pattern_start_time = time .time ()
115
- self .migrate_patterns (DocumentTypePattern )
116
- self .stdout .write (f"DocumentTypePattern migration completed in { time .time () - pattern_start_time :.2f} seconds." )
113
+ # Step 5: Promote to CuratedUrl if applicable
114
+ if collection .workflow_status in STATUSES_TO_MIGRATE :
115
+ promote_start_time = time .time ()
116
+ collection .promote_to_curated ()
117
+ self .stdout .write (f"Promoted to CuratedUrl in { time .time () - promote_start_time :.2f} seconds" )
117
118
118
- pattern_start_time = time .time ()
119
- self .migrate_patterns (DivisionPattern )
120
- self .stdout .write (f"DivisionPattern migration completed in { time .time () - pattern_start_time :.2f} seconds." )
121
-
122
- self .stdout .write (f"Total patterns migration completed in { time .time () - start_time :.2f} seconds." )
123
-
124
- # Step 5: Promote DeltaUrls to CuratedUrl
125
- start_time = time .time ()
126
- all_curated_collections_with_urls = all_collections_with_urls .filter (workflow_status__in = STATUSES_TO_MIGRATE )
127
- self .stdout .write (
128
- f"""Migrating URLs for { all_curated_collections_with_urls .count ()} collections
129
- with CURATED or higher status..."""
130
- )
131
- for collection in all_curated_collections_with_urls :
132
- collection .promote_to_curated ()
133
- self .stdout .write (f"Promotion to CuratedUrl completed in { time .time () - start_time :.2f} seconds." )
119
+ self .stdout .write (
120
+ f"Total processing time for collection: { time .time () - collection_start_time :.2f} seconds\n "
121
+ f"--------------------"
122
+ )
134
123
135
124
# Log the total time for the process
136
125
self .stdout .write (f"Total migration process completed in { time .time () - overall_start_time :.2f} seconds." )
137
126
138
- def migrate_patterns (self , non_delta_model ):
139
- """Migrate patterns from a non-delta model to the corresponding delta model."""
127
+ def migrate_patterns_for_collection (self , non_delta_model , collection ):
128
+ """Migrate patterns from a non-delta model to the corresponding delta model for a specific collection ."""
140
129
# Determine the delta model name and fetch the model class
141
130
delta_model_name = "Delta" + non_delta_model .__name__
142
131
delta_model = apps .get_model (non_delta_model ._meta .app_label , delta_model_name )
143
132
144
- self .stdout .write (f"Migrating patterns from { non_delta_model .__name__ } to { delta_model_name } ..." )
145
-
146
133
# Get all field names from both models except 'id' (primary key)
147
134
non_delta_fields = {field .name for field in non_delta_model ._meta .fields if field .name != "id" }
148
135
delta_fields = {field .name for field in delta_model ._meta .fields if field .name != "id" }
149
136
150
137
# Find shared fields
151
138
shared_fields = non_delta_fields .intersection (delta_fields )
152
139
153
- for pattern in non_delta_model .objects .all ():
140
+ # Only process patterns for the current collection
141
+ for pattern in non_delta_model .objects .filter (collection = collection ):
154
142
# Build the dictionary of shared fields to copy
155
143
delta_fields_data = {field : getattr (pattern , field ) for field in shared_fields }
156
144
157
145
# Create an instance of the delta model and save it to call the custom save() method
158
146
delta_instance = delta_model (** delta_fields_data )
159
147
delta_instance .save () # Explicitly call save() to trigger custom logic
160
-
161
- self .stdout .write (f"Migration completed for { non_delta_model .__name__ } to { delta_model_name } ." )
0 commit comments