Skip to content

Commit 28fd96d

Browse files
committed
add usecase sdg migration
1 parent a838731 commit 28fd96d

File tree

1 file changed

+290
-0
lines changed

1 file changed

+290
-0
lines changed

migrate_sdg_metadata.py

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
#!/usr/bin/env python
2+
"""
3+
Migration script to move SDG data from metadata to the new sdgs field.
4+
5+
This script:
6+
1. Finds all usecases with SDG metadata
7+
2. Maps SDG codes/names to SDG model instances
8+
3. Adds the SDG relationships to the new ManyToMany field
9+
4. Optionally removes the old metadata entries
10+
5. Optionally updates the search index
11+
12+
Usage:
13+
python migrate_sdg_metadata.py [--dry-run] [--remove-metadata] [--update-index] [--usecase-id ID] [--list-sdgs]
14+
15+
Options:
16+
--dry-run: Show what would be migrated without making changes
17+
--remove-metadata: Remove SDG metadata after successful migration
18+
--update-index: Update search index after successful migration
19+
--usecase-id ID: Migrate only the usecase with the specified ID
20+
--list-sdgs: List all available SDGs in the database
21+
"""
22+
23+
import argparse
24+
import os
25+
import subprocess
26+
import sys
27+
from typing import List, Set
28+
29+
import django
30+
31+
from api.utils.enums import MetadataModels
32+
33+
# Setup Django environment
34+
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "DataSpace.settings")
35+
django.setup()
36+
37+
from api.models import SDG, Metadata, UseCase, UseCaseMetadata
38+
39+
40+
def parse_sdg_value(value: str) -> List[str]:
41+
"""Parse SDG value which might be comma-separated."""
42+
if not value:
43+
return []
44+
# Split by comma and strip whitespace
45+
return [sdg.strip() for sdg in value.split(",") if sdg.strip()]
46+
47+
48+
def find_sdg_by_code_or_name(identifier: str) -> SDG | None:
49+
"""Find SDG by code or name (case-insensitive)."""
50+
try:
51+
# Try to find by code first (e.g., "SDG1", "SDG 1", "1")
52+
# Normalize the identifier
53+
normalized = identifier.upper().replace(" ", "")
54+
if not normalized.startswith("SDG"):
55+
normalized = f"SDG{normalized}"
56+
57+
sdg = SDG.objects.filter(code__iexact=normalized).first()
58+
if sdg:
59+
return sdg
60+
61+
# If not found by code, try by name
62+
return SDG.objects.filter(name__icontains=identifier).first()
63+
except SDG.DoesNotExist:
64+
return None
65+
66+
67+
def migrate_usecase_sdgs(
68+
dry_run: bool = False, remove_metadata: bool = False, usecase_id: int | None = None
69+
) -> None:
70+
"""Migrate SDG metadata to sdgs field for usecases."""
71+
print("\n" + "=" * 80)
72+
if usecase_id:
73+
print(f"MIGRATING USECASE SDGS FOR USECASE ID: {usecase_id}")
74+
else:
75+
print("MIGRATING USECASE SDGS")
76+
print("=" * 80)
77+
78+
# Get SDG metadata item
79+
try:
80+
sdg_metadata_item = Metadata.objects.get(
81+
label="SDG Goal", model=MetadataModels.USECASE
82+
)
83+
except Metadata.DoesNotExist:
84+
print("⚠️ No SDG metadata item found. Skipping usecase migration.")
85+
return
86+
87+
# Find all usecases with SDG metadata
88+
if usecase_id:
89+
usecase_metadata = UseCaseMetadata.objects.filter(
90+
metadata_item=sdg_metadata_item, usecase_id=str(usecase_id)
91+
).select_related("usecase", "metadata_item")
92+
else:
93+
usecase_metadata = UseCaseMetadata.objects.filter(
94+
metadata_item=sdg_metadata_item
95+
).select_related("usecase", "metadata_item")
96+
97+
total_usecases = usecase_metadata.count()
98+
if usecase_id:
99+
print(
100+
f"\nFound {total_usecases} SDG metadata entries for usecase ID {usecase_id}"
101+
)
102+
else:
103+
print(f"\nFound {total_usecases} usecases with SDG metadata")
104+
105+
migrated_count = 0
106+
error_count = 0
107+
not_found_sdgs: Set[str] = set()
108+
109+
for uc_meta in usecase_metadata:
110+
usecase = uc_meta.usecase # type: ignore
111+
sdg_identifiers = parse_sdg_value(uc_meta.value) # type: ignore
112+
113+
if not sdg_identifiers:
114+
continue
115+
116+
print(f"\n📋 UseCase: {usecase.title} (ID: {usecase.id})")
117+
print(f" SDG metadata: {uc_meta.value}") # type: ignore
118+
119+
sdgs_to_add = []
120+
for sdg_id in sdg_identifiers:
121+
sdg = find_sdg_by_code_or_name(sdg_id)
122+
if sdg:
123+
sdgs_to_add.append(sdg)
124+
print(f" ✓ Found: {sdg.code} - {sdg.name}") # type: ignore
125+
else:
126+
not_found_sdgs.add(sdg_id)
127+
print(f" ✗ Not found: {sdg_id}")
128+
129+
if sdgs_to_add:
130+
if not dry_run:
131+
# Add SDGs to the usecase
132+
usecase.sdgs.add(*sdgs_to_add)
133+
print(f" ✅ Added {len(sdgs_to_add)} SDGs to usecase")
134+
135+
# Optionally remove metadata
136+
if remove_metadata:
137+
uc_meta.delete() # type: ignore
138+
print(f" 🗑️ Removed SDG metadata")
139+
140+
migrated_count += 1
141+
else:
142+
print(f" [DRY RUN] Would add {len(sdgs_to_add)} SDGs")
143+
else:
144+
error_count += 1
145+
print(f" ⚠️ No valid SDGs found for this usecase")
146+
147+
print(f"\n{'=' * 80}")
148+
if usecase_id:
149+
print(f"UseCase Migration Summary for UseCase ID {usecase_id}:")
150+
else:
151+
print(f"UseCase Migration Summary:")
152+
print(f" Total usecases processed: {total_usecases}")
153+
print(f" Successfully migrated: {migrated_count}")
154+
print(f" Errors/Skipped: {error_count}")
155+
if not_found_sdgs:
156+
print(f"\n SDGs not found in database:")
157+
for sdg_str in sorted(not_found_sdgs):
158+
print(f" - {sdg_str}")
159+
print(f"{'=' * 80}")
160+
161+
162+
def update_search_index(index_type: str | None = None) -> None:
163+
"""Update the Elasticsearch search index."""
164+
print(f"\n🔄 Updating search index{' for ' + index_type if index_type else ''}...")
165+
166+
try:
167+
# Run the Django management command to rebuild the search index
168+
result = subprocess.run(
169+
[sys.executable, "manage.py", "search_index", "--rebuild"],
170+
capture_output=True,
171+
text=True,
172+
cwd=os.getcwd(),
173+
)
174+
175+
if result.returncode == 0:
176+
print("✅ Search index updated successfully!")
177+
if result.stdout.strip():
178+
print(f"Output: {result.stdout.strip()}")
179+
else:
180+
print("❌ Failed to update search index!")
181+
if result.stderr.strip():
182+
print(f"Error: {result.stderr.strip()}")
183+
184+
except Exception as e:
185+
print(f"❌ Error updating search index: {str(e)}")
186+
187+
188+
def list_available_sdgs() -> None:
189+
"""List all available SDGs in the database."""
190+
print("\n" + "=" * 80)
191+
print("AVAILABLE SDGs IN DATABASE")
192+
print("=" * 80)
193+
194+
sdgs = SDG.objects.all().order_by("code")
195+
196+
if not sdgs.exists():
197+
print("\n⚠️ No SDGs found in database!")
198+
print(" Please populate SDGs first.")
199+
return
200+
201+
print(f"\nTotal SDGs: {sdgs.count()}\n")
202+
for sdg in sdgs:
203+
print(f" {sdg.code}: {sdg.name}") # type: ignore
204+
if sdg.description: # type: ignore
205+
print(f" {sdg.description[:100]}...") # type: ignore
206+
207+
print("\n" + "=" * 80)
208+
209+
210+
def main() -> None:
211+
parser = argparse.ArgumentParser(
212+
description="Migrate SDG metadata to sdgs field for usecases"
213+
)
214+
parser.add_argument(
215+
"--dry-run",
216+
action="store_true",
217+
help="Show what would be migrated without making changes",
218+
)
219+
parser.add_argument(
220+
"--remove-metadata",
221+
action="store_true",
222+
help="Remove SDG metadata after successful migration",
223+
)
224+
parser.add_argument(
225+
"--list-sdgs",
226+
action="store_true",
227+
help="List all available SDGs in the database",
228+
)
229+
parser.add_argument(
230+
"--update-index",
231+
action="store_true",
232+
help="Update search index after successful migration",
233+
)
234+
parser.add_argument(
235+
"--usecase-id",
236+
type=int,
237+
help="Migrate only the usecase with the specified ID",
238+
)
239+
240+
args = parser.parse_args()
241+
242+
if args.list_sdgs:
243+
list_available_sdgs()
244+
return
245+
246+
print("\n" + "=" * 80)
247+
print("SDG METADATA MIGRATION SCRIPT")
248+
print("=" * 80)
249+
250+
if args.dry_run:
251+
print("\n🔍 DRY RUN MODE - No changes will be made")
252+
253+
if args.remove_metadata and not args.dry_run:
254+
print("\n⚠️ WARNING: SDG metadata will be DELETED after migration")
255+
response = input("Are you sure you want to continue? (yes/no): ")
256+
if response.lower() != "yes":
257+
print("Migration cancelled.")
258+
return
259+
260+
# Run migration
261+
migrate_usecase_sdgs(
262+
dry_run=args.dry_run,
263+
remove_metadata=args.remove_metadata,
264+
usecase_id=args.usecase_id,
265+
)
266+
267+
print("\n" + "=" * 80)
268+
print("MIGRATION COMPLETE")
269+
print("=" * 80)
270+
271+
if args.dry_run:
272+
print("\n💡 This was a dry run. Run without --dry-run to apply changes.")
273+
else:
274+
print("\n✅ Migration completed successfully!")
275+
276+
# Update search index if requested and migration was successful
277+
if args.update_index:
278+
update_search_index("usecases")
279+
280+
print("\n📝 Next steps:")
281+
print(" 1. Verify the migrated data in the admin panel")
282+
if not args.update_index:
283+
print(
284+
" 2. Re-index Elasticsearch: python manage.py search_index --rebuild"
285+
)
286+
print(" 3. Test the SDG filters in the frontend")
287+
288+
289+
if __name__ == "__main__":
290+
main()

0 commit comments

Comments
 (0)