|
| 1 | +""" |
| 2 | +the ej_dump is generated by running create_ej_dump.py and is scp'd to the COSMOS server |
| 3 | +this script is then run via the dm shell on the COSMOS server to populate the database |
| 4 | +""" |
| 5 | + |
| 6 | +import json |
| 7 | + |
| 8 | +from environmental_justice.models import EnvironmentalJusticeRow |
| 9 | + |
| 10 | +import urllib.parse |
| 11 | + |
| 12 | + |
| 13 | +def generate_source_link(doi_field): |
| 14 | + authority = doi_field.get("Authority") |
| 15 | + doi = doi_field.get("DOI") |
| 16 | + if authority and doi: |
| 17 | + return urllib.parse.urljoin(authority, doi) |
| 18 | + return "" |
| 19 | + |
| 20 | + |
| 21 | +def concept_id_to_sinequa_id(concept_id: str) -> str: |
| 22 | + return f"/SDE/CMR_API/|{concept_id}" |
| 23 | + |
| 24 | + |
| 25 | +def sinequa_id_to_url(sinequa_id: str) -> str: |
| 26 | + base_url = "https://sciencediscoveryengine.nasa.gov/app/nasa-sba-smd/#/preview" |
| 27 | + query = '{"name":"query-smd-primary","scope":"All","text":""}' |
| 28 | + |
| 29 | + encoded_id = urllib.parse.quote(sinequa_id, safe="") |
| 30 | + encoded_query = urllib.parse.quote(query, safe="") |
| 31 | + |
| 32 | + return f"{base_url}?id={encoded_id}&query={encoded_query}" |
| 33 | + |
| 34 | + |
| 35 | +def categorize_processing_level(level): |
| 36 | + |
| 37 | + advanced_analysis_levels = {"0", "Level 0", "NA", "Not Provided", "Not provided"} |
| 38 | + |
| 39 | + basic_analysis_levels = { |
| 40 | + "1", |
| 41 | + "1A", |
| 42 | + "1B", |
| 43 | + "1C", |
| 44 | + "1T", |
| 45 | + "2", |
| 46 | + "2A", |
| 47 | + "2B", |
| 48 | + "2G", |
| 49 | + "2P", |
| 50 | + "Level 1", |
| 51 | + "Level 1A", |
| 52 | + "Level 1B", |
| 53 | + "Level 1C", |
| 54 | + "Level 2", |
| 55 | + "Level 2A", |
| 56 | + "Level 2B", |
| 57 | + } |
| 58 | + |
| 59 | + exploration_levels = {"3", "4", "Level 3", "Level 4", "L2"} |
| 60 | + |
| 61 | + if level in exploration_levels: |
| 62 | + return "exploration" |
| 63 | + elif level in basic_analysis_levels: |
| 64 | + return "basic analysis" |
| 65 | + elif level in advanced_analysis_levels: |
| 66 | + return "advanced analysis" |
| 67 | + else: |
| 68 | + return "advanced analysis" |
| 69 | + |
| 70 | + |
| 71 | +# remove existing data |
| 72 | +EnvironmentalJusticeRow.objects.filter(destination_server=EnvironmentalJusticeRow.DestinationServerChoices.DEV).delete() |
| 73 | + |
| 74 | +ej_dump = json.load(open("backups/ej_dump_20240815_112916.json")) |
| 75 | +for dataset in ej_dump: |
| 76 | + ej_row = EnvironmentalJusticeRow( |
| 77 | + destination_server=EnvironmentalJusticeRow.DestinationServerChoices.DEV, |
| 78 | + sde_link=sinequa_id_to_url(concept_id_to_sinequa_id(dataset.get("meta", {}).get("concept-id", ""))), |
| 79 | + dataset=dataset.get("umm", {}).get("ShortName", ""), |
| 80 | + description=dataset.get("umm", {}).get("Abstract", ""), |
| 81 | + limitations=dataset.get("umm", {}).get("AccessConstraints", {}).get("Description", ""), |
| 82 | + format=dataset.get("meta", {}).get("format", ""), |
| 83 | + temporal_extent=", ".join(dataset.get("umm", {}).get("TemporalExtents", [{}])[0].get("SingleDateTimes", [])), |
| 84 | + intended_use=categorize_processing_level( |
| 85 | + dataset.get("umm", {}).get("ProcessingLevel", {}).get("Id", "advanced analysis") |
| 86 | + ), |
| 87 | + source_link=generate_source_link(dataset.get("umm", {}).get("DOI", {})), |
| 88 | + indicators=dataset["indicators"], |
| 89 | + geographic_coverage="", # Not provided in the data |
| 90 | + data_visualization="", # dataset.get("umm", {}).get("RelatedUrls", [{}])[0].get("URL", ""), |
| 91 | + latency="", # Not provided in the data |
| 92 | + spatial_resolution="", # Not provided in the data |
| 93 | + temporal_resolution="", # Not provided in the data |
| 94 | + description_simplified="", # Not provided in the data |
| 95 | + project="", # Not provided in the data |
| 96 | + strengths="", # Not provided in the data |
| 97 | + ) |
| 98 | + ej_row.save() |
0 commit comments