diff --git a/sde_collections/migrations/0076_deltatdammtagpattern.py b/sde_collections/migrations/0076_deltatdammtagpattern.py new file mode 100644 index 00000000..fd913d40 --- /dev/null +++ b/sde_collections/migrations/0076_deltatdammtagpattern.py @@ -0,0 +1,105 @@ +# Generated by Django 4.2.9 on 2025-02-14 23:33 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0075_alter_collection_reindexing_status_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="DeltaTdammTagPattern", + fields=[ + ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), + ( + "match_pattern", + models.CharField( + help_text="This pattern is compared against the URL of all documents in the collection", + verbose_name="Pattern", + ), + ), + ( + "match_pattern_type", + models.IntegerField(choices=[(1, "Individual URL Pattern"), (2, "Multi-URL Pattern")], default=1), + ), + ( + "tag", + models.CharField( + choices=[ + ("Not TDAMM", "Not TDAMM"), + ("MMA_M_EM", "Messenger - EM Radiation"), + ("MMA_M_EM_G", "Messenger - EM Radiation - Gamma rays"), + ("MMA_M_EM_X", "Messenger - EM Radiation - X-rays"), + ("MMA_M_EM_U", "Messenger - EM Radiation - Ultraviolet"), + ("MMA_M_EM_O", "Messenger - EM Radiation - Optical"), + ("MMA_M_EM_I", "Messenger - EM Radiation - Infrared"), + ("MMA_M_EM_M", "Messenger - EM Radiation - Microwave"), + ("MMA_M_EM_R", "Messenger - EM Radiation - Radio"), + ("MMA_M_G", "Messenger - Gravitational Waves"), + ("MMA_M_G_CBI", "Messenger - Gravitational Waves - Compact Binary Inspiral"), + ("MMA_M_G_S", "Messenger - Gravitational Waves - Stochastic"), + ("MMA_M_G_CON", "Messenger - Gravitational Waves - Continuous"), + ("MMA_M_G_B", "Messenger - Gravitational Waves - Burst"), + ("MMA_M_C", "Messenger - Cosmic Rays"), + ("MMA_M_N", "Messenger - Neutrinos"), + ("MMA_O_BI", "Objects - Binaries"), + ("MMA_O_BI_BBH", "Objects - Binaries - Binary Black Holes"), + ("MMA_O_BI_BNS", "Objects - Binaries - Binary Neutron Stars"), + ("MMA_O_BI_C", "Objects - Binaries - Cataclysmic Variables"), + ("MMA_O_BI_N", "Objects - Binaries - Neutron Star-Black Hole"), + ("MMA_O_BI_B", "Objects - Binaries - Binary Pulsars"), + ("MMA_O_BI_W", "Objects - Binaries - White Dwarf Binaries"), + ("MMA_O_BH", "Objects - Black Holes"), + ("MMA_O_BH_AGN", "Objects - Black Holes - Active Galactic Nuclei"), + ("MMA_O_BH_IM", "Objects - Black Holes - Intermediate mass"), + ("MMA_O_BH_STM", "Objects - Black Holes - Stellar mass"), + ("MMA_O_BH_SUM", "Objects - Black Holes - Supermassive"), + ("MMA_O_E", "Objects - Exoplanets"), + ("MMA_O_N", "Objects - Neutron Stars"), + ("MMA_O_N_M", "Objects - Neutron Stars - Magnetars"), + ("MMA_O_N_P", "Objects - Neutron Stars - Pulsars"), + ("MMA_O_N_PWN", "Objects - Neutron Stars - Pulsar Wind Nebula"), + ("MMA_O_S", "Objects - Supernova Remnants"), + ("MMA_S_F", "Signals - Fast Radio Bursts"), + ("MMA_S_G", "Signals - Gamma-ray Bursts"), + ("MMA_S_K", "Signals - Kilonovae"), + ("MMA_S_N", "Signals - Novae"), + ("MMA_S_P", "Signals - Pevatrons"), + ("MMA_S_ST", "Signals - Stellar flares"), + ("MMA_S_SU", "Signals - Supernovae"), + ], + help_text="TDAMM tag to apply", + max_length=255, + ), + ), + ( + "source", + models.CharField( + choices=[("manual", "Manual"), ("ml", "Machine Learning")], default="manual", max_length=10 + ), + ), + ( + "collection", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="%(class)ss", + related_query_name="%(class)ss", + to="sde_collections.collection", + ), + ), + ("curated_urls", models.ManyToManyField(related_name="%(class)ss", to="sde_collections.curatedurl")), + ("delta_urls", models.ManyToManyField(related_name="%(class)ss", to="sde_collections.deltaurl")), + ], + options={ + "verbose_name": "Delta TDAMM Tag Pattern", + "verbose_name_plural": "Delta TDAMM Tag Patterns", + "ordering": ["match_pattern"], + "abstract": False, + "unique_together": {("collection", "match_pattern")}, + }, + ), + ] diff --git a/sde_collections/migrations/0077_remove_deltatdammtagpattern_source_and_more.py b/sde_collections/migrations/0077_remove_deltatdammtagpattern_source_and_more.py new file mode 100644 index 00000000..9854de2e --- /dev/null +++ b/sde_collections/migrations/0077_remove_deltatdammtagpattern_source_and_more.py @@ -0,0 +1,72 @@ +# Generated by Django 4.2.9 on 2025-02-25 00:04 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0076_deltatdammtagpattern"), + ] + + operations = [ + migrations.RemoveField( + model_name="deltatdammtagpattern", + name="source", + ), + migrations.AddField( + model_name="deltatdammtagpattern", + name="operation", + field=models.IntegerField(choices=[(1, "Add Tag"), (2, "Remove Tag")], default=1), + ), + migrations.AlterField( + model_name="deltatdammtagpattern", + name="tag", + field=models.CharField( + choices=[ + ("Not TDAMM", "Not TDAMM"), + ("MMA_M_EM", "Messenger - EM Radiation"), + ("MMA_M_EM_G", "Messenger - EM Radiation - Gamma rays"), + ("MMA_M_EM_X", "Messenger - EM Radiation - X-rays"), + ("MMA_M_EM_U", "Messenger - EM Radiation - Ultraviolet"), + ("MMA_M_EM_O", "Messenger - EM Radiation - Optical"), + ("MMA_M_EM_I", "Messenger - EM Radiation - Infrared"), + ("MMA_M_EM_M", "Messenger - EM Radiation - Microwave"), + ("MMA_M_EM_R", "Messenger - EM Radiation - Radio"), + ("MMA_M_G", "Messenger - Gravitational Waves"), + ("MMA_M_G_CBI", "Messenger - Gravitational Waves - Compact Binary Inspiral"), + ("MMA_M_G_S", "Messenger - Gravitational Waves - Stochastic"), + ("MMA_M_G_CON", "Messenger - Gravitational Waves - Continuous"), + ("MMA_M_G_B", "Messenger - Gravitational Waves - Burst"), + ("MMA_M_C", "Messenger - Cosmic Rays"), + ("MMA_M_N", "Messenger - Neutrinos"), + ("MMA_O_BI", "Objects - Binaries"), + ("MMA_O_BI_BBH", "Objects - Binaries - Binary Black Holes"), + ("MMA_O_BI_BNS", "Objects - Binaries - Binary Neutron Stars"), + ("MMA_O_BI_C", "Objects - Binaries - Cataclysmic Variables"), + ("MMA_O_BI_N", "Objects - Binaries - Neutron Star-Black Hole"), + ("MMA_O_BI_B", "Objects - Binaries - Binary Pulsars"), + ("MMA_O_BI_W", "Objects - Binaries - White Dwarf Binaries"), + ("MMA_O_BH", "Objects - Black Holes"), + ("MMA_O_BH_AGN", "Objects - Black Holes - Active Galactic Nuclei"), + ("MMA_O_BH_IM", "Objects - Black Holes - Intermediate mass"), + ("MMA_O_BH_STM", "Objects - Black Holes - Stellar mass"), + ("MMA_O_BH_SUM", "Objects - Black Holes - Supermassive"), + ("MMA_O_E", "Objects - Exoplanets"), + ("MMA_O_N", "Objects - Neutron Stars"), + ("MMA_O_N_M", "Objects - Neutron Stars - Magnetars"), + ("MMA_O_N_P", "Objects - Neutron Stars - Pulsars"), + ("MMA_O_N_PWN", "Objects - Neutron Stars - Pulsar Wind Nebula"), + ("MMA_O_S", "Objects - Supernova Remnants"), + ("MMA_S_F", "Signals - Fast Radio Bursts"), + ("MMA_S_G", "Signals - Gamma-ray Bursts"), + ("MMA_S_K", "Signals - Kilonovae"), + ("MMA_S_N", "Signals - Novae"), + ("MMA_S_P", "Signals - Pevatrons"), + ("MMA_S_ST", "Signals - Stellar flares"), + ("MMA_S_SU", "Signals - Supernovae"), + ], + max_length=255, + ), + ), + ] diff --git a/sde_collections/migrations/0078_deltatdammtagpattern_source.py b/sde_collections/migrations/0078_deltatdammtagpattern_source.py new file mode 100644 index 00000000..28b00b87 --- /dev/null +++ b/sde_collections/migrations/0078_deltatdammtagpattern_source.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.9 on 2025-02-25 02:39 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0077_remove_deltatdammtagpattern_source_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="deltatdammtagpattern", + name="source", + field=models.CharField(default="manual", max_length=10), + ), + ] diff --git a/sde_collections/migrations/0079_alter_deltatdammtagpattern_unique_together.py b/sde_collections/migrations/0079_alter_deltatdammtagpattern_unique_together.py new file mode 100644 index 00000000..731b3da8 --- /dev/null +++ b/sde_collections/migrations/0079_alter_deltatdammtagpattern_unique_together.py @@ -0,0 +1,17 @@ +# Generated by Django 4.2.9 on 2025-02-27 02:38 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0078_deltatdammtagpattern_source"), + ] + + operations = [ + migrations.AlterUniqueTogether( + name="deltatdammtagpattern", + unique_together={("collection", "match_pattern", "tag", "operation", "source")}, + ), + ] diff --git a/sde_collections/migrations/0080_alter_deltatdammtagpattern_unique_together_and_more.py b/sde_collections/migrations/0080_alter_deltatdammtagpattern_unique_together_and_more.py new file mode 100644 index 00000000..2f9040ca --- /dev/null +++ b/sde_collections/migrations/0080_alter_deltatdammtagpattern_unique_together_and_more.py @@ -0,0 +1,78 @@ +# Generated by Django 4.2.9 on 2025-03-10 21:59 + +import django.contrib.postgres.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0079_alter_deltatdammtagpattern_unique_together"), + ] + + operations = [ + migrations.AlterUniqueTogether( + name="deltatdammtagpattern", + unique_together={("collection", "match_pattern", "operation", "source")}, + ), + migrations.AddField( + model_name="deltatdammtagpattern", + name="tags", + field=django.contrib.postgres.fields.ArrayField( + base_field=models.CharField( + choices=[ + ("Not TDAMM", "Not TDAMM"), + ("MMA_M_EM", "Messenger - EM Radiation"), + ("MMA_M_EM_G", "Messenger - EM Radiation - Gamma rays"), + ("MMA_M_EM_X", "Messenger - EM Radiation - X-rays"), + ("MMA_M_EM_U", "Messenger - EM Radiation - Ultraviolet"), + ("MMA_M_EM_O", "Messenger - EM Radiation - Optical"), + ("MMA_M_EM_I", "Messenger - EM Radiation - Infrared"), + ("MMA_M_EM_M", "Messenger - EM Radiation - Microwave"), + ("MMA_M_EM_R", "Messenger - EM Radiation - Radio"), + ("MMA_M_G", "Messenger - Gravitational Waves"), + ("MMA_M_G_CBI", "Messenger - Gravitational Waves - Compact Binary Inspiral"), + ("MMA_M_G_S", "Messenger - Gravitational Waves - Stochastic"), + ("MMA_M_G_CON", "Messenger - Gravitational Waves - Continuous"), + ("MMA_M_G_B", "Messenger - Gravitational Waves - Burst"), + ("MMA_M_C", "Messenger - Cosmic Rays"), + ("MMA_M_N", "Messenger - Neutrinos"), + ("MMA_O_BI", "Objects - Binaries"), + ("MMA_O_BI_BBH", "Objects - Binaries - Binary Black Holes"), + ("MMA_O_BI_BNS", "Objects - Binaries - Binary Neutron Stars"), + ("MMA_O_BI_C", "Objects - Binaries - Cataclysmic Variables"), + ("MMA_O_BI_N", "Objects - Binaries - Neutron Star-Black Hole"), + ("MMA_O_BI_B", "Objects - Binaries - Binary Pulsars"), + ("MMA_O_BI_W", "Objects - Binaries - White Dwarf Binaries"), + ("MMA_O_BH", "Objects - Black Holes"), + ("MMA_O_BH_AGN", "Objects - Black Holes - Active Galactic Nuclei"), + ("MMA_O_BH_IM", "Objects - Black Holes - Intermediate mass"), + ("MMA_O_BH_STM", "Objects - Black Holes - Stellar mass"), + ("MMA_O_BH_SUM", "Objects - Black Holes - Supermassive"), + ("MMA_O_E", "Objects - Exoplanets"), + ("MMA_O_N", "Objects - Neutron Stars"), + ("MMA_O_N_M", "Objects - Neutron Stars - Magnetars"), + ("MMA_O_N_P", "Objects - Neutron Stars - Pulsars"), + ("MMA_O_N_PWN", "Objects - Neutron Stars - Pulsar Wind Nebula"), + ("MMA_O_S", "Objects - Supernova Remnants"), + ("MMA_S_F", "Signals - Fast Radio Bursts"), + ("MMA_S_G", "Signals - Gamma-ray Bursts"), + ("MMA_S_K", "Signals - Kilonovae"), + ("MMA_S_N", "Signals - Novae"), + ("MMA_S_P", "Signals - Pevatrons"), + ("MMA_S_ST", "Signals - Stellar flares"), + ("MMA_S_SU", "Signals - Supernovae"), + ], + max_length=255, + ), + blank=True, + help_text="List of tags to add or remove", + null=True, + size=None, + ), + ), + migrations.RemoveField( + model_name="deltatdammtagpattern", + name="tag", + ), + ] diff --git a/sde_collections/migrations/0081_alter_deltatdammtagpattern_tags.py b/sde_collections/migrations/0081_alter_deltatdammtagpattern_tags.py new file mode 100644 index 00000000..5ea14e62 --- /dev/null +++ b/sde_collections/migrations/0081_alter_deltatdammtagpattern_tags.py @@ -0,0 +1,71 @@ +# Generated by Django 4.2.9 on 2025-03-10 23:56 + +import django.contrib.postgres.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0080_alter_deltatdammtagpattern_unique_together_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="deltatdammtagpattern", + name="tags", + field=django.contrib.postgres.fields.ArrayField( + base_field=models.CharField( + choices=[ + ("Not TDAMM", "Not TDAMM"), + ("MMA_M_EM", "Messenger - EM Radiation"), + ("MMA_M_EM_G", "Messenger - EM Radiation - Gamma rays"), + ("MMA_M_EM_X", "Messenger - EM Radiation - X-rays"), + ("MMA_M_EM_U", "Messenger - EM Radiation - Ultraviolet"), + ("MMA_M_EM_O", "Messenger - EM Radiation - Optical"), + ("MMA_M_EM_I", "Messenger - EM Radiation - Infrared"), + ("MMA_M_EM_M", "Messenger - EM Radiation - Microwave"), + ("MMA_M_EM_R", "Messenger - EM Radiation - Radio"), + ("MMA_M_G", "Messenger - Gravitational Waves"), + ("MMA_M_G_CBI", "Messenger - Gravitational Waves - Compact Binary Inspiral"), + ("MMA_M_G_S", "Messenger - Gravitational Waves - Stochastic"), + ("MMA_M_G_CON", "Messenger - Gravitational Waves - Continuous"), + ("MMA_M_G_B", "Messenger - Gravitational Waves - Burst"), + ("MMA_M_C", "Messenger - Cosmic Rays"), + ("MMA_M_N", "Messenger - Neutrinos"), + ("MMA_O_BI", "Objects - Binaries"), + ("MMA_O_BI_BBH", "Objects - Binaries - Binary Black Holes"), + ("MMA_O_BI_BNS", "Objects - Binaries - Binary Neutron Stars"), + ("MMA_O_BI_C", "Objects - Binaries - Cataclysmic Variables"), + ("MMA_O_BI_N", "Objects - Binaries - Neutron Star-Black Hole"), + ("MMA_O_BI_B", "Objects - Binaries - Binary Pulsars"), + ("MMA_O_BI_W", "Objects - Binaries - White Dwarf Binaries"), + ("MMA_O_BH", "Objects - Black Holes"), + ("MMA_O_BH_AGN", "Objects - Black Holes - Active Galactic Nuclei"), + ("MMA_O_BH_IM", "Objects - Black Holes - Intermediate mass"), + ("MMA_O_BH_STM", "Objects - Black Holes - Stellar mass"), + ("MMA_O_BH_SUM", "Objects - Black Holes - Supermassive"), + ("MMA_O_E", "Objects - Exoplanets"), + ("MMA_O_N", "Objects - Neutron Stars"), + ("MMA_O_N_M", "Objects - Neutron Stars - Magnetars"), + ("MMA_O_N_P", "Objects - Neutron Stars - Pulsars"), + ("MMA_O_N_PWN", "Objects - Neutron Stars - Pulsar Wind Nebula"), + ("MMA_O_S", "Objects - Supernova Remnants"), + ("MMA_S_F", "Signals - Fast Radio Bursts"), + ("MMA_S_G", "Signals - Gamma-ray Bursts"), + ("MMA_S_K", "Signals - Kilonovae"), + ("MMA_S_N", "Signals - Novae"), + ("MMA_S_P", "Signals - Pevatrons"), + ("MMA_S_ST", "Signals - Stellar flares"), + ("MMA_S_SU", "Signals - Supernovae"), + ], + max_length=255, + ), + blank=True, + default=list, + help_text="List of tags to add or remove", + null=True, + size=None, + ), + ), + ] diff --git a/sde_collections/migrations/0082_merge_20250402_1127.py b/sde_collections/migrations/0082_merge_20250402_1127.py new file mode 100644 index 00000000..f4c582a4 --- /dev/null +++ b/sde_collections/migrations/0082_merge_20250402_1127.py @@ -0,0 +1,13 @@ +# Generated by Django 4.2.9 on 2025-04-02 16:27 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("sde_collections", "0077_alter_candidateurl_tdamm_tag_manual_and_more"), + ("sde_collections", "0081_alter_deltatdammtagpattern_tags"), + ] + + operations = [] diff --git a/sde_collections/models/collection.py b/sde_collections/models/collection.py index 097c5ce7..5a3209c8 100644 --- a/sde_collections/models/collection.py +++ b/sde_collections/models/collection.py @@ -267,6 +267,24 @@ def add_to_public_query(self): scraper_content = scraper_editor.update_config_xml() gh.create_or_update_file(query_path, scraper_content) + def has_tdamm_tags(self): + """Check if any URLs in this collection have TDAMM tags.""" + # Check DeltaUrls + has_delta_tags = ( + self.delta_urls.filter(models.Q(tdamm_tag_manual__isnull=False) | models.Q(tdamm_tag_ml__isnull=False)) + .exclude(models.Q(tdamm_tag_manual=[]) & models.Q(tdamm_tag_ml=[])) + .exists() + ) + + # Check CuratedUrls + has_curated_tags = ( + self.curated_urls.filter(models.Q(tdamm_tag_manual__isnull=False) | models.Q(tdamm_tag_ml__isnull=False)) + .exclude(models.Q(tdamm_tag_manual=[]) & models.Q(tdamm_tag_ml=[])) + .exists() + ) + + return has_delta_tags or has_curated_tags + @property def _scraper_config_path(self) -> str: return f"sources/scrapers/{self.config_folder}/default.xml" @@ -661,6 +679,12 @@ def apply_all_patterns(self): for pattern in self.deltadivisionpatterns.all(): pattern.apply() + for pattern in self.deltatdammtagpatterns.filter(operation=1).order_by("id"): + pattern.apply() + + for pattern in self.deltatdammtagpatterns.filter(operation=2).order_by("id"): + pattern.apply() + def generate_inference_job(self, classification_type): """Creates a new inference job for a collection.""" diff --git a/sde_collections/models/collection_choice_fields.py b/sde_collections/models/collection_choice_fields.py index a433317a..08020007 100644 --- a/sde_collections/models/collection_choice_fields.py +++ b/sde_collections/models/collection_choice_fields.py @@ -165,3 +165,10 @@ def lookup_by_text(cls, text: str) -> str | None: if choice[1].lower() == text.lower(): return choice[0] return None + + +class OperationChoices(models.IntegerChoices): + """Operation choices for the TDAMM tag pattern model.""" + + ADD = 1, "Add Tag" + REMOVE = 2, "Remove Tag" diff --git a/sde_collections/models/delta_patterns.py b/sde_collections/models/delta_patterns.py index 76f5b7b5..a0f361e3 100644 --- a/sde_collections/models/delta_patterns.py +++ b/sde_collections/models/delta_patterns.py @@ -2,6 +2,7 @@ from typing import Any from django.apps import apps +from django.contrib.postgres.fields import ArrayField from django.core.exceptions import ValidationError from django.db import models @@ -11,7 +12,12 @@ resolve_title, validate_fstring, ) -from .collection_choice_fields import Divisions, DocumentTypes +from .collection_choice_fields import ( + Divisions, + DocumentTypes, + OperationChoices, + TDAMMTags, +) class BaseMatchPattern(models.Model): @@ -691,3 +697,365 @@ def save(self, *args, **kwargs): class DeltaResolvedTitleError(DeltaResolvedTitleBase): error_string = models.TextField(null=False, blank=False) http_status_code = models.IntegerField(null=True, blank=True) + + +class DeltaTdammTagPattern(BaseMatchPattern): + """Pattern for adding or removing TDAMM tags.""" + + # tag = models.CharField(max_length=255, choices=TDAMMTags.choices) + tags = ArrayField( + models.CharField(max_length=255, choices=TDAMMTags.choices), + blank=True, + null=True, + default=list, + help_text="List of tags to add or remove", + ) + operation = models.IntegerField(choices=OperationChoices.choices, default=OperationChoices.ADD) + source = models.CharField(max_length=10, default="manual") + + def save(self, *args, **kwargs): + # Ensure tags are never None + if self.tags is None: + self.tags = [] + super().save(*args, **kwargs) + + def apply(self): + """ + Apply tag operation to matching URLs. + Similar to FieldModifyingPattern.apply() but with custom tag handling. + """ + DeltaUrl = apps.get_model("sde_collections", "DeltaUrl") + + # Get newly matching Curated URLs + matching_curated_urls = self.get_matching_curated_urls() + previously_unaffected_curated = matching_curated_urls.exclude( + id__in=self.curated_urls.values_list("id", flat=True) + ) + + # Process each unaffected Curated URL + for curated_url in previously_unaffected_curated: + if not self.is_most_distinctive_pattern(curated_url): + continue + + # Check if operation would change tags + if self._would_operation_change_tags(curated_url): + # Check if Delta already exists + delta_exists = DeltaUrl.objects.filter(url=curated_url.url, collection=self.collection).exists() + + if not delta_exists: + # Create new Delta URL with copied fields + fields = { + field.name: getattr(curated_url, field.name) + for field in curated_url._meta.fields + if field.name not in ["id", "collection"] + } + fields["to_delete"] = False + fields["collection"] = self.collection + + delta = DeltaUrl.objects.create(**fields) + + # Apply tag operation + self._apply_tag_operation(delta) + + # Update tags for all matching Delta URLs + for delta_url in self.get_matching_delta_urls(): + if self.is_most_distinctive_pattern(delta_url): + self._apply_tag_operation(delta_url) + + # affected_urls = list(self.get_matching_delta_urls().values_list('url', flat=True)) + + # Update pattern relationships + self.update_affected_delta_urls_list() + self.update_affected_curated_urls_list() + + # for pattern_model in ['DeltaExcludePattern', + # 'DeltaIncludePattern', + # 'DeltaTitlePattern', + # 'DeltaDocumentTypePattern', + # 'DeltaDivisionPattern']: + # model = ContentType.objects.get(app_label="sde_collections", model=pattern_model.lower()).model_class() + # for pattern in model.objects.filter(collection=self.collection): + # # Only check patterns that might apply to our URLs + # for url in affected_urls: + # if re.search(pattern.get_regex_pattern(), url): + # pattern.apply_to_url(url) + + # def unapply(self): + # """Remove tag operation effects with comprehensive checks.""" + # DeltaUrl = apps.get_model("sde_collections", "DeltaUrl") + # CuratedUrl = apps.get_model("sde_collections", "CuratedUrl") + + # # Store affected URLs + # affected_urls = [(delta.id, delta.url) for delta in self.delta_urls.all()] + + # # Clear relationships + # self.delta_urls.clear() + # self.curated_urls.clear() + + # # Process each affected URL + # for delta_id, url in affected_urls: + # try: + # delta = DeltaUrl.objects.get(id=delta_id) + # curated = CuratedUrl.objects.get(collection=delta.collection, url=url) + + # # Revert tags + # if delta.tdamm_tag_manual: + # if self.operation == 1: # ADD + # if self.tag in delta.tdamm_tag_manual: + # delta.tdamm_tag_manual.remove(self.tag) + # elif self.operation == 2: # REMOVE + # if self.tag not in delta.tdamm_tag_manual and self.tag in (delta.tdamm_tag_ml or []): + # delta.tdamm_tag_manual = [t for t in (delta.tdamm_tag_ml or []) if t != self.tag] + + # if not delta.tdamm_tag_manual: + # delta.tdamm_tag_manual = None + + # delta.save() + + # # Check ALL pattern types that might affect this URL + # other_patterns_exist = ( + # DeltaExcludePattern.objects.filter(delta_urls=delta).exists() + # or DeltaIncludePattern.objects.filter(delta_urls=delta).exists() + # or DeltaTitlePattern.objects.filter(delta_urls=delta).exists() + # or DeltaDocumentTypePattern.objects.filter(delta_urls=delta).exists() + # or DeltaDivisionPattern.objects.filter(delta_urls=delta).exists() + # or DeltaTdammTagPattern.objects.filter(delta_urls=delta).exists() + # ) + + # # Only delete if no other patterns affect it AND it matches curated + # if not other_patterns_exist and delta._fields_match(curated) and not delta.to_delete: + # DeltaUrl.objects.filter(id=delta_id).delete() + + # except (DeltaUrl.DoesNotExist, CuratedUrl.DoesNotExist): + # continue + + def unapply(self): + """Remove tag operation effects with comprehensive handling for multiple tags.""" + # DeltaUrl = apps.get_model("sde_collections", "DeltaUrl") + CuratedUrl = apps.get_model("sde_collections", "CuratedUrl") + + # Store affected URLs + affected_urls = list(self.delta_urls.all()) + + # Clear relationship before deleting to avoid cascade issues + self.delta_urls.clear() + self.curated_urls.clear() + + # Process each affected URL + for delta_url in affected_urls: + current_tags = delta_url.tdamm_tag_manual or [] + changed = False + + # If it was an ADD pattern, remove these tags + if self.operation == OperationChoices.ADD and self.tags: + # Remove all tags that were added by this pattern + new_tags = [t for t in current_tags if t not in self.tags] + if new_tags != current_tags: + delta_url.tdamm_tag_manual = new_tags if new_tags else None + changed = True + + # If it was a REMOVE pattern, we don't need to add back tags automatically + # Other patterns will be reapplied by refresh_url_tags below + + if changed: + delta_url.save() + + # Refresh all patterns for this URL to ensure consistency + self._refresh_url_tags(delta_url) + + # Check for cleanup + try: + curated_url = CuratedUrl.objects.get(collection=delta_url.collection, url=delta_url.url) + if not delta_url.to_delete and delta_url._fields_match(curated_url): + delta_url.delete() + except CuratedUrl.DoesNotExist: + pass + + def _refresh_url_tags(self, url_obj): + """Refresh URL tags by reapplying all patterns affecting this URL.""" + DeltaTdammTagPattern = apps.get_model("sde_collections", "DeltaTdammTagPattern") + + # Get all patterns affecting this URL + add_patterns = DeltaTdammTagPattern.objects.filter( + collection=self.collection, delta_urls=url_obj, operation=OperationChoices.ADD + ).exclude( + id=self.id + ) # Exclude this pattern + + remove_patterns = DeltaTdammTagPattern.objects.filter( + collection=self.collection, delta_urls=url_obj, operation=OperationChoices.REMOVE + ).exclude( + id=self.id + ) # Exclude this pattern + + # Collect all tags from ADD patterns + add_tags = set() + for pattern in add_patterns: + add_tags.update(pattern.tags or []) + + # Collect all tags from REMOVE patterns + remove_tags = set() + for pattern in remove_patterns: + remove_tags.update(pattern.tags or []) + + # Final tags = (add tags) - (remove tags) + final_tags = list(add_tags - remove_tags) + + # Update URL tags + if set(url_obj.tdamm_tag_manual or []) != set(final_tags): + url_obj.tdamm_tag_manual = final_tags if final_tags else None + url_obj.save() + + def _would_operation_change_tags(self, url_obj): + """Determine if applying the operation would change the tags.""" + ml_tags = url_obj.tdamm_tag_ml or [] + manual_tags = url_obj.tdamm_tag_manual or [] + + if self.tags is None: + return False + + if self.operation == OperationChoices.ADD: + # Would only change if at least one tag is not in manual tags + # return self.tag not in (manual_tags or []) + return any(tag not in manual_tags for tag in self.tags) + + elif self.operation == OperationChoices.REMOVE: + # Would only change if 1) tag in manual OR 2) no manual tags and tag in ML + # return (manual_tags and self.tag in manual_tags) or (not manual_tags and ml_tags and self.tag in ml_tags) + # Would only change if any tag is present in manual OR no manual tags and any tag is in ML + return (manual_tags and any(tag in manual_tags for tag in self.tags)) or ( + not manual_tags and ml_tags and any(tag in ml_tags for tag in self.tags) + ) + + # def _apply_tag_operation(self, url_obj): + # manual_tags = url_obj.tdamm_tag_manual or [] + # changed = False + + # print(f"Applying operation {self.operation} to URL: {url_obj.url}") + # print(f"Pattern tags: {self.tags}") + # print(f"Current manual tags: {manual_tags}") + + # if self.operation == OperationChoices.ADD: + # # Add all tags in the list + # current_tags = manual_tags or [] + # for tag in self.tags: + # if tag not in current_tags: + # current_tags.append(tag) + # changed = True + # if changed: + # print(f"New tags after ADD: {current_tags}") + # url_obj.tdamm_tag_manual = current_tags + # else: # REMOVE + # # Remove all tags in the list + # if manual_tags: + # new_tags = [t for t in manual_tags if t not in self.tags] + # if new_tags != manual_tags: + # print(f"New tags after REMOVE: {new_tags}") + # url_obj.tdamm_tag_manual = new_tags + # changed = True + + # if changed: + # url_obj.save() + # if hasattr(url_obj, "_cleanup_if_needed"): + # url_obj._cleanup_if_needed() + # print(f"URL saved with new tags: {url_obj.tdamm_tag_manual}") + + def _apply_tag_operation(self, url_obj): + """Apply tag operations to the URL with proper synchronization.""" + manual_tags = url_obj.tdamm_tag_manual or [] + changed = False + + # Ensure tags is a list, not None + self.tags = self.tags or [] + + if self.operation == OperationChoices.ADD: + # For ADD operations, we want to synchronize the tags + # This means the URL should have exactly the tags in the pattern + # Get tags from all ADD patterns that apply to this URL + all_add_tags = set(self.tags) + + # Find all other ADD patterns for this URL + other_add_patterns = DeltaTdammTagPattern.objects.filter( + collection=self.collection, + operation=OperationChoices.ADD, + delta_urls__id=url_obj.id, # This pattern applies to this URL + ).exclude(id=self.id) + + # Add tags from other applicable ADD patterns + for pattern in other_add_patterns: + all_add_tags.update(pattern.tags or []) + + # Find all REMOVE patterns for this URL + remove_patterns = DeltaTdammTagPattern.objects.filter( + collection=self.collection, + operation=OperationChoices.REMOVE, + delta_urls__id=url_obj.id, # This pattern applies to this URL + ) + + # Remove tags specified by REMOVE patterns + remove_tags = set() + for pattern in remove_patterns: + remove_tags.update(pattern.tags or []) + + # Final tags = (all add tags) - (all remove tags) + final_tags = list(all_add_tags - remove_tags) + + # Update if different from current tags + if set(final_tags) != set(manual_tags): + url_obj.tdamm_tag_manual = final_tags + changed = True + + elif self.operation == OperationChoices.REMOVE: + # For REMOVE operations, just remove the tags + if manual_tags: + new_tags = [t for t in manual_tags if t not in self.tags] + if len(new_tags) != len(manual_tags): + url_obj.tdamm_tag_manual = new_tags if new_tags else None + changed = True + + # Save changes + if changed: + url_obj.save() + if hasattr(url_obj, "_cleanup_if_needed"): + url_obj._cleanup_if_needed() + + def _revert_tag_operation(self, url_obj): + """Revert the effects of the tag operation.""" + ml_tags = url_obj.tdamm_tag_ml or [] + manual_tags = url_obj.tdamm_tag_manual or [] + changed = False + + if self.operation == OperationChoices.ADD: + # If we added the tag, remove it + if manual_tags and self.tag in manual_tags: + manual_tags.remove(self.tag) + changed = True + + if changed: + url_obj.tdamm_tag_manual = manual_tags + url_obj.save() + + elif self.operation == OperationChoices.REMOVE: + # If we removed a tag, add it back if it was in ML tags + if ml_tags and self.tag in ml_tags: + if not manual_tags: + manual_tags = list(ml_tags) + changed = True + elif self.tag not in manual_tags: + manual_tags.append(self.tag) + changed = True + + if changed: + if not manual_tags: + manual_tags = None + url_obj.tdamm_tag_manual = manual_tags + url_obj.save() + + if hasattr(url_obj, "_cleanup_if_needed"): + url_obj._cleanup_if_needed() + print(f"Delta URL {url_obj.url} - ML tags: {url_obj.tdamm_tag_ml}, Manual tags: {url_obj.tdamm_tag_manual}") + + class Meta(BaseMatchPattern.Meta): + verbose_name = "Delta TDAMM Tag Pattern" + verbose_name_plural = "Delta TDAMM Tag Patterns" + unique_together = ("collection", "match_pattern", "operation", "source") diff --git a/sde_collections/models/delta_url.py b/sde_collections/models/delta_url.py index 88df502b..b58e9927 100644 --- a/sde_collections/models/delta_url.py +++ b/sde_collections/models/delta_url.py @@ -134,6 +134,84 @@ def splits(self) -> list[tuple[str, str]]: parts.append((part_string, part)) return parts + def get_tag_source(self): + """Returns the source of the TDAMM tags: 'manual', 'ml', or 'Not Set'""" + # Convert None to empty list for comparison + manual_tags = self.tdamm_tag_manual or [] + ml_tags = self.tdamm_tag_ml or [] + + if manual_tags and manual_tags != []: + return "manual" + elif ml_tags and ml_tags != []: + return "ml" + + return "Not Set" + + def _fields_match(self, other): + """Compare fields between two URL objects.""" + fields_to_compare = [ + "scraped_title", + "scraped_text", + "generated_title", + "visited", + "document_type", + "division", + ] + + # Regular field comparison + basic_match = all(getattr(self, field) == getattr(other, field) for field in fields_to_compare) + + # Special handling for tag fields - treat [] and None as equivalent + def tags_equivalent(a, b): + if not a and not b: # Both are empty (None or []) + return True + return a == b + + # Compare tag fields with special handling + tags_match = tags_equivalent(self.tdamm_tag_manual, other.tdamm_tag_manual) and tags_equivalent( + self.tdamm_tag_ml, other.tdamm_tag_ml + ) + + return basic_match and tags_match + + def add_tag(self, tag: str, source: str) -> None: + """Add a tag and handle cleanup if needed.""" + if source == "ml": + current_tags = self.tdamm_tag_ml or [] + new_tags = list(current_tags) + if tag not in new_tags: + new_tags.append(tag) + self.tdamm_tag_manual = new_tags + else: + current_tags = self.tdamm_tag_manual or [] + if tag not in current_tags: + current_tags.append(tag) + self.tdamm_tag_manual = current_tags + + self.save() + self._cleanup_if_needed() + + def remove_tag(self, tag: str, source: str) -> None: + """Remove a tag and handle cleanup if needed.""" + if source == "ml": + ml_tags = self.tdamm_tag_ml + if ml_tags: + new_manual_tags = [t for t in ml_tags if t != tag] + self.tdamm_tag_manual = new_manual_tags + else: + if self.tdamm_tag_manual: + manual_tags = self.tdamm_tag_manual + if tag in manual_tags: + manual_tags.remove(tag) + self.tdamm_tag_manual = manual_tags + + self.save() + self._cleanup_if_needed() + + def _cleanup_if_needed(self): + """Override in DeltaUrl to implement cleanup logic.""" + pass + @property def path(self) -> str: parsed = urlparse(self.url) @@ -165,6 +243,15 @@ class DeltaUrl(BaseUrl): objects = DeltaUrlManager() to_delete = models.BooleanField(default=False) + def _cleanup_if_needed(self): + """Delete if identical to curated URL and not marked for deletion.""" + try: + curated_url = CuratedUrl.objects.get(collection=self.collection, url=self.url) + if not self.to_delete and self._fields_match(curated_url): + self.delete() + except CuratedUrl.DoesNotExist: + pass + class Meta: verbose_name = "Delta Urls" verbose_name_plural = "Delta Urls" @@ -175,9 +262,43 @@ class CuratedUrl(BaseUrl): """Urls that are curated and ready for production""" collection = models.ForeignKey("Collection", on_delete=models.CASCADE, related_name="curated_urls") - objects = CuratedUrlManager() + def _is_delta_identical(self, delta_url): + """Check if DeltaUrl has identical metadata to CuratedUrl.""" + fields_to_compare = [ + "scraped_title", + "scraped_text", + "generated_title", + "visited", + "document_type", + "division", + "tdamm_tag_manual", + "tdamm_tag_ml", + ] + return all(getattr(delta_url, field) == getattr(self, field) for field in fields_to_compare) + + def _create_or_update_delta(self): + """Create or update delta URL using collection's delta migration logic.""" + self.collection.create_or_update_delta_url(self, to_delete=False) + return self.collection.delta_urls.get(url=self.url) + + def add_tag(self, tag: str, source: str) -> None: + """Create/update DeltaUrl and add tag to it.""" + delta_url = self._create_or_update_delta() + if delta_url: + delta_url.add_tag(tag, source) + if not delta_url.to_delete and delta_url.tdamm_tag == self.tdamm_tag: + delta_url.delete() + + def remove_tag(self, tag: str, source: str) -> None: + """Create/update DeltaUrl and remove tag from it.""" + delta_url = self._create_or_update_delta() + if delta_url: + delta_url.remove_tag(tag, source) + if not delta_url.to_delete and delta_url.tdamm_tag == self.tdamm_tag: + delta_url.delete() + class Meta: verbose_name = "Curated Urls" verbose_name_plural = "Curated Urls" diff --git a/sde_collections/serializers.py b/sde_collections/serializers.py index e4be30ae..8482dfb7 100644 --- a/sde_collections/serializers.py +++ b/sde_collections/serializers.py @@ -7,6 +7,7 @@ DeltaDocumentTypePattern, DeltaExcludePattern, DeltaIncludePattern, + DeltaTdammTagPattern, DeltaTitlePattern, ) from .models.delta_url import CuratedUrl, DeltaUrl @@ -73,13 +74,22 @@ class DeltaURLSerializer(serializers.ModelSerializer): match_pattern_type = serializers.SerializerMethodField(read_only=True) delta_urls_count = serializers.SerializerMethodField(read_only=True) tdamm_tag = serializers.SerializerMethodField() + tag_source = serializers.SerializerMethodField() exclude_pattern_type = serializers.IntegerField(read_only=True) include_pattern_id = serializers.IntegerField(read_only=True) def get_tdamm_tag(self, obj): tags = obj.tdamm_tag + # print(f"TDAMM tags for {obj.url}:") + # print(f"- Raw tags: {tags}") + # print(f"- Manual tags: {obj.tdamm_tag_manual}") + # print(f"- ML tags: {obj.tdamm_tag_ml}") return tags if tags is not None else [] + def get_tag_source(self, obj): + # print(f"get_tag_source called for {obj.url}, returning: {obj.get_tag_source()}") + return obj.get_tag_source() + def get_delta_urls_count(self, obj): titlepattern = obj.deltatitlepatterns.last() return titlepattern.delta_urls.count() if titlepattern else 0 @@ -110,6 +120,7 @@ class Meta: "division_display", "visited", "tdamm_tag", + "tag_source", "exclude_pattern_type", "include_pattern_id", ) @@ -124,11 +135,15 @@ class CuratedURLSerializer(serializers.ModelSerializer): match_pattern_type = serializers.SerializerMethodField(read_only=True) curated_urls_count = serializers.SerializerMethodField(read_only=True) tdamm_tag = serializers.SerializerMethodField() + tag_source = serializers.SerializerMethodField() def get_tdamm_tag(self, obj): tags = obj.tdamm_tag return tags if tags is not None else [] + def get_tag_source(self, obj): + return obj.get_tag_source() + def get_curated_urls_count(self, obj): titlepattern = obj.deltatitlepatterns.last() return titlepattern.curated_urls.count() if titlepattern else 0 @@ -158,6 +173,7 @@ class Meta: "division_display", "visited", "tdamm_tag", + "tag_source", ) @@ -385,3 +401,30 @@ def validate_match_pattern(self, value): except DeltaDivisionPattern.DoesNotExist: pass return value + + +class TdammTagPatternSerializer(BasePatternSerializer, serializers.ModelSerializer): + tags_display = serializers.SerializerMethodField(read_only=True) + operation_display = serializers.CharField(source="get_operation_display", read_only=True) + + class Meta: + model = DeltaTdammTagPattern + fields = BasePatternSerializer.Meta.fields + ( + "tags", + "tags_display", + "operation", + "operation_display", + "source", + ) + + def create(self, validated_data): + # Ensure tags field exists and is not None + if "tags" not in validated_data or validated_data["tags"] is None: + validated_data["tags"] = [] + return super().create(validated_data) + + def get_tags_display(self, obj): + """Return display names for all tags""" + if not obj.tags: + return [] + return [dict(TDAMMTags.choices).get(tag, tag) for tag in obj.tags] diff --git a/sde_collections/tests/test_tdamm_tags.py b/sde_collections/tests/test_tdamm_tags.py index af5e927a..a3a64857 100644 --- a/sde_collections/tests/test_tdamm_tags.py +++ b/sde_collections/tests/test_tdamm_tags.py @@ -195,3 +195,207 @@ def test_tdamm_tags_updated_in_promotion(self, collection): curated_url = CuratedUrl.objects.get(url="https://example.com") assert curated_url.tdamm_tag == ["MMA_M_G"] assert curated_url.tdamm_tag_manual == ["MMA_M_G"] + + +@pytest.mark.django_db +class TestTDAMMTagUtilityMethods: + """Test additional TDAMM tag utility methods""" + + def test_get_tag_source_method(self): + """Test the get_tag_source method for different tag scenarios""" + url = DeltaUrlFactory() + + # Scenario 1: No tags + assert url.get_tag_source() == "Not Set" + + # Scenario 2: Only ML tags + url.tdamm_tag_ml = ["MMA_M_EM"] + assert url.get_tag_source() == "ml" + + # Scenario 3: Only manual tags + url.tdamm_tag_ml = None + url.tdamm_tag = ["MMA_M_G"] + assert url.get_tag_source() == "manual" + + # Scenario 4: Both ML and manual tags (manual should take precedence) + url.tdamm_tag_ml = ["MMA_M_EM"] + assert url.get_tag_source() == "manual" + + def test_add_tag_method(self): + """Test the add_tag method for different sources""" + url = DeltaUrlFactory() + + # Prepare initial ml tags + url.tdamm_tag_ml = ["MMA_O_BH"] + + # Add manual tag since the source was ml + url.add_tag("MMA_M_G", "ml") + assert url.tdamm_tag_manual == ["MMA_O_BH", "MMA_M_G"] + + # Add manual tag + url.add_tag("MMA_M_EM", "manual") + assert url.tdamm_tag_manual == ["MMA_O_BH", "MMA_M_G", "MMA_M_EM"] + assert url.tdamm_tag_ml == ["MMA_O_BH"] + + # # Prevent duplicate tags + url.add_tag("MMA_M_EM", "manual") + assert url.tdamm_tag_manual == ["MMA_O_BH", "MMA_M_G", "MMA_M_EM"] + + def test_remove_tag_method(self): + """Test the remove_tag method for different sources""" + url = DeltaUrlFactory() + + # Prepare initial tags + url.tdamm_tag_ml = ["MMA_M_EM", "MMA_O_N"] + + # Confirm no manual tags + assert True if url.tdamm_tag_manual is None else False + + # Remove tag if source was ml + url.remove_tag("MMA_M_EM", "ml") + assert url.tdamm_tag_ml == ["MMA_M_EM", "MMA_O_N"] + assert url.tdamm_tag_manual == ["MMA_O_N"] + + # Remove tag if source was manual + url.remove_tag("MMA_O_N", "manual") + assert url.tdamm_tag_manual == [] + + # Default to ML tags if manual tags are empty + assert url.tdamm_tag == ["MMA_M_EM", "MMA_O_N"] + assert url.tdamm_tag_ml == ["MMA_M_EM", "MMA_O_N"] + + def test_tdamm_tag_collection_method(self): + """Test the collection method for checking TDAMM tags""" + collection = CollectionFactory() + + # Create URLs with different tag scenarios + DeltaUrlFactory(collection=collection, tdamm_tag_manual=["MMA_M_EM"]) + DeltaUrlFactory(collection=collection, tdamm_tag_ml=["MMA_O_BH"]) + DeltaUrlFactory(collection=collection) # No tags + + # Verify has_tdamm_tags method + assert collection.has_tdamm_tags() is True + + # Create a new collection with no tagged URLs + empty_collection = CollectionFactory() + assert empty_collection.has_tdamm_tags() is False + + def test_ml_source_tag_behavior(self): + """Test that ML source copies existing list to manual tags""" + url = DeltaUrlFactory() + + # Set initial ML tags + url.tdamm_tag_ml = ["MMA_M_EM", "MMA_M_G"] + + # Add tag with ML source - should copy ML tags and add new one + url.add_tag("MMA_M_N", "ml") + assert url.tdamm_tag_manual == ["MMA_M_EM", "MMA_M_G", "MMA_M_N"] + assert url.tdamm_tag_ml == ["MMA_M_EM", "MMA_M_G"] + + def test_manual_source_tag_behavior(self): + """Test that manual source only affects manual tags""" + url = DeltaUrlFactory() + url.tdamm_tag_ml = ["MMA_M_EM"] + url.tdamm_tag_manual = ["MMA_M_G"] + + url.add_tag("MMA_M_N", "manual") + assert url.tdamm_tag_manual == ["MMA_M_G", "MMA_M_N"] + assert url.tdamm_tag_ml == ["MMA_M_EM"] + + def test_tag_operations_with_none_values(self): + """Test tag operations when fields are None""" + url = DeltaUrlFactory() + + # Add tag when both fields are None + url.add_tag("MMA_M_EM", "manual") + assert url.tdamm_tag_manual == ["MMA_M_EM"] + + # Remove tag when field is None + url = DeltaUrlFactory() + url.remove_tag("MMA_M_EM", "manual") + assert True if url.tdamm_tag_manual is None else False + + def test_invalid_tag_operations(self): + """Test operations with invalid tags""" + url = DeltaUrlFactory() + url.tdamm_tag_manual = ["MMA_M_EM"] + + # Remove non-existent tag + url.remove_tag("INVALID_TAG", "manual") + assert url.tdamm_tag_manual == ["MMA_M_EM"] + + def test_delta_cleanup_after_tag_changes(self): + """Test DeltaUrl cleanup when tags match CuratedUrl""" + collection = CollectionFactory() + + # Create matching URLs + CuratedUrl.objects.create(collection=collection, url="https://example.com", tdamm_tag_manual=["MMA_M_EM"]) + + delta = DeltaUrl.objects.create(collection=collection, url="https://example.com") + + # Add same tag - should trigger cleanup + delta.add_tag("MMA_M_EM", "manual") + assert not DeltaUrl.objects.filter(id=delta.id).exists() + + def test_curated_url_tag_operations(self): + """Test that CuratedUrl tag changes create/update DeltaUrl""" + collection = CollectionFactory() + curated = CuratedUrl.objects.create(collection=collection, url="https://example.com", tdamm_tag_manual=[]) + + # Initial state + curated.tdamm_tag_manual = [] + curated.save() + + # Adding tag should create DeltaUrl with different tags + curated.add_tag("MMA_M_EM", "manual") + delta = DeltaUrl.objects.get(url=curated.url) + assert delta.tdamm_tag_manual == ["MMA_M_EM"] + + # Adding another tag should update existing DeltaUrl + curated.add_tag("MMA_M_G", "manual") + delta.refresh_from_db() + assert len(DeltaUrl.objects.filter(url=curated.url)) == 1 + assert delta.tdamm_tag_manual == ["MMA_M_G"] + + def test_curated_url_ml_tag_operations(self): + """Test CuratedUrl operations with ML source""" + collection = CollectionFactory() + curated = CuratedUrl.objects.create( + collection=collection, url="https://example.com", tdamm_tag_ml=["MMA_M_EM", "MMA_M_G"] + ) + + # Adding tag with ML source should copy ML tags to manual + curated.add_tag("MMA_M_N", "ml") + delta = DeltaUrl.objects.get(url=curated.url) + assert delta.tdamm_tag_manual == ["MMA_M_EM", "MMA_M_G", "MMA_M_N"] + assert delta.tdamm_tag_ml == ["MMA_M_EM", "MMA_M_G"] + + def test_ml_source_cleanup_behavior(self): + """Test cleanup when using ML source""" + url = DeltaUrlFactory() + url.tdamm_tag_ml = ["MMA_M_EM", "MMA_M_G"] + + # Using ML source should copy to manual + url.add_tag("MMA_M_N", "ml") + assert url.tdamm_tag_manual == ["MMA_M_EM", "MMA_M_G", "MMA_M_N"] + assert url.tdamm_tag_ml == ["MMA_M_EM", "MMA_M_G"] + + # Removing via ML source should update manual + url.remove_tag("MMA_M_N", "ml") + assert url.tdamm_tag_manual == ["MMA_M_EM", "MMA_M_G"] + + def test_tag_source_transitions(self): + """Test transitioning between tag sources""" + url = DeltaUrlFactory() + + # Start with ML tags + url.tdamm_tag_ml = ["MMA_M_EM"] + assert url.get_tag_source() == "ml" + + # Add manual tag should change source + url.add_tag("MMA_M_G", "manual") + assert url.get_tag_source() == "manual" + + # Clear manual tags should revert to ML + url.tdamm_tag_manual = [] + assert url.get_tag_source() == "ml" diff --git a/sde_collections/urls.py b/sde_collections/urls.py index 9ee77759..944de2dd 100644 --- a/sde_collections/urls.py +++ b/sde_collections/urls.py @@ -16,6 +16,7 @@ router.register(r"document-type-patterns", views.DocumentTypePatternViewSet) router.register(r"division-patterns", views.DivisionPatternViewSet) router.register(r"environmental-justice", EnvironmentalJusticeRowViewSet) +router.register(r"tdamm-tag-patterns", views.TdammTagPatternViewSet) app_name = "sde_collections" diff --git a/sde_collections/views.py b/sde_collections/views.py index eba0b5e9..c2f9dc29 100644 --- a/sde_collections/views.py +++ b/sde_collections/views.py @@ -1,3 +1,4 @@ +import logging import re from django.contrib import messages @@ -12,6 +13,7 @@ from django.views.generic.edit import DeleteView from django.views.generic.list import ListView from rest_framework import generics, status, viewsets +from rest_framework.decorators import action from rest_framework.exceptions import ValidationError from rest_framework.generics import ListAPIView from rest_framework.response import Response @@ -24,7 +26,9 @@ CurationStatusChoices, Divisions, DocumentTypes, + OperationChoices, ReindexingStatusChoices, + TDAMMTags, WorkflowStatusChoices, ) from .models.delta_patterns import ( @@ -34,6 +38,7 @@ DeltaIncludePattern, DeltaResolvedTitle, DeltaResolvedTitleError, + DeltaTdammTagPattern, DeltaTitlePattern, ) from .models.delta_url import CuratedUrl, DeltaUrl @@ -49,12 +54,14 @@ DocumentTypePatternSerializer, ExcludePatternSerializer, IncludePatternSerializer, + TdammTagPatternSerializer, TitlePatternSerializer, ) from .tasks import push_to_github_task from .utils.health_check import generate_db_github_metadata_differences User = get_user_model() +logger = logging.getLogger(__name__) class CollectionListView(LoginRequiredMixin, ListView): @@ -233,6 +240,16 @@ def get_context_data(self, **kwargs): context["workflow_status_choices"] = WorkflowStatusChoices context["reindexing_status_choices"] = ReindexingStatusChoices context["is_multi_division"] = self.collection.is_multi_division + context["has_tdamm_tags"] = self.collection.has_tdamm_tags() + + tdamm_choices = [ + {"code": choice[0], "label": choice[1], "display": f"{choice[0]}: {choice[1]}"} + for choice in TDAMMTags.choices + # if choice[0] != 'Not TDAMM' + ] + context["tdamm_choices"] = tdamm_choices + + # print("TDAMM choices:", context['tdamm_choices']) return context @@ -314,6 +331,355 @@ def update_division(self, request, pk=None): return Response(status=status.HTTP_200_OK) return Response(status=status.HTTP_400_BAD_REQUEST, data={"error": "Division is required."}) + # @action(detail=True, methods=["post"], url_path="add_tag") + # def add_tag(self, request, pk=None): + # delta_url = self.get_object() + # tag = request.data.get("tag") + # source = request.data.get("source", "manual") + + # if not tag: + # return Response({"error": "Tag not specified"}, status=400) + + # try: + # # Create or get a pattern for this specific URL + # pattern, created = DeltaTdammTagPattern.objects.get_or_create( + # collection=delta_url.collection, + # match_pattern=delta_url.url, + # match_pattern_type=1, + # # tag=tag, + # operation=OperationChoices.ADD, + # source=source, + # defaults={"tags": [tag]}, + # ) + + # if not created: + # # Add tag if not already present + # current_tags = pattern.tags or [] + # if tag not in current_tags: + # pattern.tags = current_tags + [tag] + # pattern.save() + + # # Remove from REMOVE pattern if it exists + # remove_pattern = DeltaTdammTagPattern.objects.filter( + # collection=delta_url.collection, + # match_pattern=delta_url.url, + # operation=OperationChoices.REMOVE, + # source=source, + # ).first() + + # if remove_pattern and remove_pattern.tags and tag in remove_pattern.tags: + # # Remove this tag from the remove pattern + # remove_pattern.tags = [t for t in remove_pattern.tags if t != tag] + # if not remove_pattern.tags: + # remove_pattern.delete() + # else: + # remove_pattern.save() + + # # Apply the pattern + # pattern._apply_tag_operation(delta_url) + + # return Response({"status": "success"}) + # except Exception as e: + # logger.error(f"Error occurred: {str(e)}") + # return Response({"error": "An internal error has occurred."}, status=500) + + @action(detail=True, methods=["post"], url_path="add_tag") + def add_tag(self, request, pk=None): + delta_url = self.get_object() + tag = request.data.get("tag") + source = request.data.get("source", "manual") + + if not tag: + return Response({"error": "Tag not specified"}, status=400) + + try: + # Get current manual and ML tags + manual_tags = delta_url.tdamm_tag_manual + ml_tags = delta_url.tdamm_tag_ml or [] + + logger.info(f"Adding tag '{tag}' to URL: {delta_url.url}") + logger.info(f"Current manual tags: {manual_tags}") + logger.info(f"Current ML tags: {ml_tags}") + + # STEP 1: Update URL's manual tags + + # Case 3: No manual tags, but ML tags exist - copy ML tags to manual + if (manual_tags is None or len(manual_tags) == 0) and ml_tags: + logger.info("No manual tags, but ML tags exist - copying ML tags to manual") + working_tags = list(ml_tags) + # Case 1 & 2: Empty or existing manual tags + else: + working_tags = list(manual_tags) if manual_tags is not None else [] + + # Add the new tag if not already present + if tag not in working_tags: + working_tags.append(tag) + delta_url.tdamm_tag_manual = working_tags + delta_url.save() + logger.info(f"Updated URL tags to: {working_tags}") + + # STEP 2: Find or update ADD pattern + + # Look for existing ADD patterns for this URL + existing_patterns = list( + DeltaTdammTagPattern.objects.filter( + collection=delta_url.collection, + match_pattern=delta_url.url, + match_pattern_type=1, + operation=OperationChoices.ADD, + ) + ) + + logger.info(f"Found {len(existing_patterns)} existing ADD patterns") + + # Update existing pattern or create new one + if existing_patterns: + # Use the first pattern found + add_pattern = existing_patterns[0] + logger.info(f"Updating existing pattern {add_pattern.id}, current tags: {add_pattern.tags}") + + # Ensure pattern has all current tags + pattern_tags = set(add_pattern.tags or []) + for current_tag in working_tags: + pattern_tags.add(current_tag) + + add_pattern.tags = list(pattern_tags) + add_pattern.source = source # Update source to match current request + add_pattern.save() + logger.info(f"Updated pattern tags to: {add_pattern.tags}") + + # Delete any other duplicate patterns + for pattern in existing_patterns[1:]: + logger.info(f"Deleting duplicate pattern {pattern.id}") + pattern.delete() + else: + # Create a new pattern with all working tags + logger.info(f"Creating new ADD pattern with tags: {working_tags}") + add_pattern = DeltaTdammTagPattern.objects.create( + collection=delta_url.collection, + match_pattern=delta_url.url, + match_pattern_type=1, + operation=OperationChoices.ADD, + source=source, + tags=working_tags, + ) + logger.info(f"Created pattern {add_pattern.id}") + + # STEP 3: Check for and update any REMOVE patterns + + remove_patterns = DeltaTdammTagPattern.objects.filter( + collection=delta_url.collection, + match_pattern=delta_url.url, + match_pattern_type=1, + operation=OperationChoices.REMOVE, + ) + + for remove_pattern in remove_patterns: + if remove_pattern.tags and tag in remove_pattern.tags: + remove_pattern.tags = [t for t in remove_pattern.tags if t != tag] + if not remove_pattern.tags: + logger.info(f"Deleting empty REMOVE pattern {remove_pattern.id}") + remove_pattern.delete() + else: + logger.info(f"Updated REMOVE pattern {remove_pattern.id} to {remove_pattern.tags}") + remove_pattern.save() + + return Response({"status": "success"}) + except Exception as e: + logger.error(f"Error occurred: {str(e)}") + return Response({"error": f"An internal error has occurred: {str(e)}"}, status=500) + + # @action(detail=True, methods=["post"], url_path="remove_tag") + # def remove_tag(self, request, pk=None): + # delta_url = self.get_object() + # tag = request.data.get("tag") + # source = request.data.get("source", "manual") + + # if not tag: + # return Response({"error": "Tag not specified"}, status=400) + + # try: + # # Create or get a pattern for this specific URL + # pattern, created = DeltaTdammTagPattern.objects.get_or_create( + # collection=delta_url.collection, + # match_pattern=delta_url.url, + # match_pattern_type=1, + # # tag=tag, + # operation=OperationChoices.REMOVE, + # source=source, + # defaults={"tags": [tag]}, + # ) + + # if not created: + # # Add tag to remove list if not already present + # current_tags = pattern.tags or [] + # if tag not in current_tags: + # pattern.tags = current_tags + [tag] + # pattern.save() + + # # Remove from ADD pattern if it exists + # add_pattern = DeltaTdammTagPattern.objects.filter( + # collection=delta_url.collection, + # match_pattern=delta_url.url, + # operation=OperationChoices.ADD, + # source=source, + # ).first() + + # if add_pattern and add_pattern.tags and tag in add_pattern.tags: + # # Remove this tag from the add pattern + # add_pattern.tags = [t for t in add_pattern.tags if t != tag] + # if not add_pattern.tags: + # add_pattern.delete() + # else: + # add_pattern.save() + + # # Apply the pattern + # pattern._apply_tag_operation(delta_url) + + # return Response({"status": "success"}) + # except Exception as e: + # logger.error(f"Error occurred: {str(e)}") + # return Response({"error": "An internal error has occurred."}, status=500) + + @action(detail=True, methods=["post"], url_path="remove_tag") + def remove_tag(self, request, pk=None): + delta_url = self.get_object() + tag = request.data.get("tag") + source = request.data.get("source", "manual") + + if not tag: + return Response({"error": "Tag not specified"}, status=400) + + try: + # Get current tags + manual_tags = delta_url.tdamm_tag_manual or [] + ml_tags = delta_url.tdamm_tag_ml or [] + + # First, let's log what patterns exist for debugging + logger.info(f"Looking for patterns matching URL: {delta_url.url}") + + # Find ALL add patterns that might contain this tag + add_patterns = list( + DeltaTdammTagPattern.objects.filter( + collection=delta_url.collection, + match_pattern=delta_url.url, + match_pattern_type=1, + operation=OperationChoices.ADD, + source=source, + ) + ) + + logger.info(f"Found {len(add_patterns)} ADD patterns") + for pat in add_patterns: + logger.info(f"ADD pattern {pat.id}: tags={pat.tags}") + + # Flag to track if we modified an ADD pattern + modified_add_pattern = False + + # Check ALL add patterns (there might be duplicates) + for add_pattern in add_patterns: + if add_pattern.tags and tag in add_pattern.tags: + logger.info(f"Modifying ADD pattern {add_pattern.id}") + + # Remove the tag from the pattern's tags + add_pattern.tags = [t for t in add_pattern.tags if t != tag] + + # Save or delete the pattern + if not add_pattern.tags: + logger.info(f"Deleting empty ADD pattern {add_pattern.id}") + add_pattern.delete() + else: + logger.info(f"Saving ADD pattern {add_pattern.id} with tags {add_pattern.tags}") + add_pattern.save() + + modified_add_pattern = True + + # Always update the URL's tags to remove the tag + if tag in manual_tags: + new_tags = [t for t in manual_tags if t != tag] + # Set to None if empty, not empty list + delta_url.tdamm_tag_manual = new_tags if new_tags else None + delta_url.save() + logger.info(f"Updated URL tags: {delta_url.tdamm_tag_manual}") + + # If we modified an ADD pattern, we're done - no need for a REMOVE pattern + if modified_add_pattern: + return Response({"status": "success"}) + + # Only create a REMOVE pattern if tag is in ML tags + if tag in ml_tags: + logger.info(f"Creating/updating REMOVE pattern for ML tag {tag}") + + # Find existing REMOVE patterns + remove_patterns = list( + DeltaTdammTagPattern.objects.filter( + collection=delta_url.collection, + match_pattern=delta_url.url, + match_pattern_type=1, + operation=OperationChoices.REMOVE, + source=source, + ) + ) + + logger.info(f"Found {len(remove_patterns)} REMOVE patterns") + + if remove_patterns: + # Update the first REMOVE pattern + remove_pattern = remove_patterns[0] + current_tags = remove_pattern.tags or [] + if tag not in current_tags: + remove_pattern.tags = current_tags + [tag] + remove_pattern.save() + logger.info(f"Updated REMOVE pattern {remove_pattern.id} with tags {remove_pattern.tags}") + else: + # Create new REMOVE pattern + remove_pattern = DeltaTdammTagPattern.objects.create( + collection=delta_url.collection, + match_pattern=delta_url.url, + match_pattern_type=1, + operation=OperationChoices.REMOVE, + source=source, + tags=[tag], + ) + logger.info(f"Created new REMOVE pattern {remove_pattern.id} with tag {tag}") + + # Call cleanup if needed + if hasattr(delta_url, "_cleanup_if_needed"): + delta_url._cleanup_if_needed() + + return Response({"status": "success"}) + except Exception as e: + logger.error(f"Error occurred: {str(e)}") + return Response({"error": f"An internal error has occurred: {str(e)}"}, status=500) + + # def _refresh_url_tags(self, url_obj): + # """Refresh URL tags by reapplying all patterns affecting this URL.""" + # # Get all patterns affecting this URL + # add_patterns = DeltaTdammTagPattern.objects.filter( + # collection=url_obj.collection, delta_urls=url_obj, operation=OperationChoices.ADD + # ) + + # remove_patterns = DeltaTdammTagPattern.objects.filter( + # collection=url_obj.collection, delta_urls=url_obj, operation=OperationChoices.REMOVE + # ) + + # # Collect all tags from ADD patterns + # add_tags = set() + # for pattern in add_patterns: + # add_tags.update(pattern.tags or []) + + # # Collect all tags from REMOVE patterns + # remove_tags = set() + # for pattern in remove_patterns: + # remove_tags.update(pattern.tags or []) + + # # Final tags = (add tags) - (remove tags) + # final_tags = list(add_tags - remove_tags) + + # # Update URL tags + # url_obj.tdamm_tag_manual = final_tags if final_tags else None + # url_obj.save() + class CuratedURLViewSet(CollectionFilterMixin, viewsets.ModelViewSet): queryset = CuratedUrl.objects.all() @@ -344,6 +710,393 @@ def update_division(self, request, pk=None): return Response(status=status.HTTP_200_OK) return Response(status=status.HTTP_400_BAD_REQUEST, data={"error": "Division is required."}) + # @action(detail=True, methods=["post"], url_path="add_tag") + # def add_tag(self, request, pk=None): + # curated_url = self.get_object() + # tag = request.data.get("tag") + # source = request.data.get("source", "manual") + + # if not tag: + # return Response({"error": "Tag not specified"}, status=400) + + # try: + # # Create delta URL first + # delta_url = curated_url._create_or_update_delta() + + # # Then apply tag operations to the delta + # pattern, created = DeltaTdammTagPattern.objects.get_or_create( + # collection=curated_url.collection, + # match_pattern=curated_url.url, + # match_pattern_type=1, + # operation=OperationChoices.ADD, + # source=source, + # defaults={"tags": [tag]}, + # ) + + # if not created: + # # Add tag if not already present + # current_tags = pattern.tags or [] + # if tag not in current_tags: + # pattern.tags = current_tags + [tag] + # pattern.save() + + # if not created: + # # Add tag if not already present + # current_tags = pattern.tags or [] + # if tag not in current_tags: + # pattern.tags = current_tags + [tag] + # pattern.save() + + # # Remove from REMOVE pattern if it exists + # remove_pattern = DeltaTdammTagPattern.objects.filter( + # collection=delta_url.collection, + # match_pattern=delta_url.url, + # operation=OperationChoices.REMOVE, + # source=source, + # ).first() + + # if remove_pattern and remove_pattern.tags and tag in remove_pattern.tags: + # # Remove this tag from the remove pattern + # remove_pattern.tags = [t for t in remove_pattern.tags if t != tag] + # if not remove_pattern.tags: + # remove_pattern.delete() + # else: + # remove_pattern.save() + + # pattern._apply_tag_operation(delta_url) + + # # Clean up if delta becomes identical to curated + # if not delta_url.to_delete and delta_url._fields_match(curated_url): + # delta_url.delete() + + # return Response({"status": "success"}) + # except Exception as e: + # logger.error(f"Error occurred: {str(e)}") + # return Response({"error": "An internal error has occurred."}, status=500) + + @action(detail=True, methods=["post"], url_path="add_tag") + def add_tag(self, request, pk=None): + curated_url = self.get_object() + tag = request.data.get("tag") + source = request.data.get("source", "manual") + + if not tag: + return Response({"error": "Tag not specified"}, status=400) + + try: + # Create delta URL first + delta_url = curated_url._create_or_update_delta() + + # Get current manual and ML tags + manual_tags = delta_url.tdamm_tag_manual + ml_tags = curated_url.tdamm_tag_ml or [] + + logger.info(f"Adding tag '{tag}' to URL: {curated_url.url}") + logger.info(f"Current manual tags: {manual_tags}") + logger.info(f"Current ML tags: {ml_tags}") + + # STEP 1: Update URL's manual tags + + # Case 3: No manual tags, but ML tags exist - copy ML tags to manual + if (manual_tags is None or len(manual_tags) == 0) and ml_tags: + logger.info("No manual tags, but ML tags exist - copying ML tags to manual") + working_tags = list(ml_tags) + # Case 1 & 2: Empty or existing manual tags + else: + working_tags = list(manual_tags) if manual_tags is not None else [] + + # Add the new tag if not already present + if tag not in working_tags: + working_tags.append(tag) + delta_url.tdamm_tag_manual = working_tags + delta_url.save() + logger.info(f"Updated URL tags to: {working_tags}") + + # STEP 2: Find or update ADD pattern + + # Look for existing ADD patterns for this URL + existing_patterns = list( + DeltaTdammTagPattern.objects.filter( + collection=curated_url.collection, + match_pattern=curated_url.url, + match_pattern_type=1, + operation=OperationChoices.ADD, + ) + ) + + logger.info(f"Found {len(existing_patterns)} existing ADD patterns") + + # Update existing pattern or create new one + if existing_patterns: + # Use the first pattern found + add_pattern = existing_patterns[0] + logger.info(f"Updating existing pattern {add_pattern.id}, current tags: {add_pattern.tags}") + + # Ensure pattern has all current tags + pattern_tags = set(add_pattern.tags or []) + for current_tag in working_tags: + pattern_tags.add(current_tag) + + add_pattern.tags = list(pattern_tags) + add_pattern.source = source # Update source to match current request + add_pattern.save() + logger.info(f"Updated pattern tags to: {add_pattern.tags}") + + # Delete any other duplicate patterns + for pattern in existing_patterns[1:]: + logger.info(f"Deleting duplicate pattern {pattern.id}") + pattern.delete() + else: + # Create a new pattern with all working tags + logger.info(f"Creating new ADD pattern with tags: {working_tags}") + add_pattern = DeltaTdammTagPattern.objects.create( + collection=curated_url.collection, + match_pattern=curated_url.url, + match_pattern_type=1, + operation=OperationChoices.ADD, + source=source, + tags=working_tags, + ) + logger.info(f"Created pattern {add_pattern.id}") + + # STEP 3: Check for and update any REMOVE patterns + + remove_patterns = DeltaTdammTagPattern.objects.filter( + collection=curated_url.collection, + match_pattern=curated_url.url, + match_pattern_type=1, + operation=OperationChoices.REMOVE, + ) + + for remove_pattern in remove_patterns: + if remove_pattern.tags and tag in remove_pattern.tags: + remove_pattern.tags = [t for t in remove_pattern.tags if t != tag] + if not remove_pattern.tags: + logger.info(f"Deleting empty REMOVE pattern {remove_pattern.id}") + remove_pattern.delete() + else: + logger.info(f"Updated REMOVE pattern {remove_pattern.id} to {remove_pattern.tags}") + remove_pattern.save() + + # Clean up delta if it matches curated + if not delta_url.to_delete and delta_url._fields_match(curated_url): + logger.info(f"Deleting delta URL {delta_url.url} as it matches curated URL") + delta_url.delete() + + return Response({"status": "success"}) + except Exception as e: + logger.error(f"Error occurred: {str(e)}") + return Response({"error": f"An internal error has occurred: {str(e)}"}, status=500) + + # @action(detail=True, methods=["post"], url_path="remove_tag") + # def remove_tag(self, request, pk=None): + # curated_url = self.get_object() + # tag = request.data.get("tag") + # source = request.data.get("source", "manual") + + # if not tag: + # return Response({"error": "Tag not specified"}, status=400) + + # try: + # # Create delta URL first + # delta_url = curated_url._create_or_update_delta() + + # # Create or get a pattern for tag removal + # pattern, created = DeltaTdammTagPattern.objects.get_or_create( + # collection=curated_url.collection, + # match_pattern=curated_url.url, + # match_pattern_type=1, + # operation=DeltaTdammTagPattern.OperationChoices.REMOVE, + # source=source, + # defaults={"tags": [tag]}, + # ) + + # if not created: + # # Add tag to removal list if not already present + # current_tags = pattern.tags or [] + # if tag not in current_tags: + # pattern.tags = current_tags + [tag] + # pattern.save() + + # # Check for and update ADD pattern if it exists + # add_pattern = DeltaTdammTagPattern.objects.filter( + # collection=curated_url.collection, + # match_pattern=curated_url.url, + # operation=DeltaTdammTagPattern.OperationChoices.ADD, + # source=source, + # ).first() + + # if add_pattern and add_pattern.tags and tag in add_pattern.tags: + # # Remove tag from add pattern + # add_pattern.tags = [t for t in add_pattern.tags if t != tag] + # if not add_pattern.tags: + # add_pattern.delete() + # else: + # add_pattern.save() + + # # Apply the pattern + # pattern._apply_tag_operation(delta_url) + + # # Clean up if delta becomes identical to curated + # if not delta_url.to_delete and delta_url._fields_match(curated_url): + # delta_url.delete() + + # return Response({"status": "success"}) + # except Exception as e: + # logger.error(f"Error occurred: {str(e)}") + # return Response({"error": "An internal error has occurred."}, status=500) + + @action(detail=True, methods=["post"], url_path="remove_tag") + def remove_tag(self, request, pk=None): + curated_url = self.get_object() + tag = request.data.get("tag") + source = request.data.get("source", "manual") + + if not tag: + return Response({"error": "Tag not specified"}, status=400) + + try: + # Create delta URL first + delta_url = curated_url._create_or_update_delta() + + # Get current tags + manual_tags = delta_url.tdamm_tag_manual or [] + ml_tags = curated_url.tdamm_tag_ml or [] + + # First, let's log what patterns exist for debugging + logger.info(f"Looking for patterns matching URL: {curated_url.url}") + + # Find ALL add patterns that might contain this tag + add_patterns = list( + DeltaTdammTagPattern.objects.filter( + collection=curated_url.collection, + match_pattern=curated_url.url, + match_pattern_type=1, + operation=OperationChoices.ADD, + source=source, + ) + ) + + logger.info(f"Found {len(add_patterns)} ADD patterns") + for pat in add_patterns: + logger.info(f"ADD pattern {pat.id}: tags={pat.tags}") + + # Flag to track if we modified an ADD pattern + modified_add_pattern = False + + # Check ALL add patterns (there might be duplicates) + for add_pattern in add_patterns: + if add_pattern.tags and tag in add_pattern.tags: + logger.info(f"Modifying ADD pattern {add_pattern.id}") + + # Remove the tag from the pattern's tags + add_pattern.tags = [t for t in add_pattern.tags if t != tag] + + # Save or delete the pattern + if not add_pattern.tags: + logger.info(f"Deleting empty ADD pattern {add_pattern.id}") + add_pattern.delete() + else: + logger.info(f"Saving ADD pattern {add_pattern.id} with tags {add_pattern.tags}") + add_pattern.save() + + modified_add_pattern = True + + # Always update the URL's tags to remove the tag + if tag in manual_tags: + new_tags = [t for t in manual_tags if t != tag] + # Set to None if empty, not empty list + delta_url.tdamm_tag_manual = new_tags if new_tags else None + delta_url.save() + logger.info(f"Updated URL tags: {delta_url.tdamm_tag_manual}") + + # If we modified an ADD pattern and there are no tags left, possibly we can delete the delta + if modified_add_pattern and (delta_url.tdamm_tag_manual is None or len(delta_url.tdamm_tag_manual) == 0): + # Check if the delta can be deleted + if not delta_url.to_delete and delta_url._fields_match(curated_url): + logger.info(f"Deleting delta URL {delta_url.id} as it matches curated URL") + delta_url.delete() + return Response({"status": "success"}) + + # If we modified an ADD pattern, we're done - no need for a REMOVE pattern + if modified_add_pattern: + return Response({"status": "success"}) + + # Only create a REMOVE pattern if tag is in ML tags + if tag in ml_tags: + logger.info(f"Creating/updating REMOVE pattern for ML tag {tag}") + + # Find existing REMOVE patterns + remove_patterns = list( + DeltaTdammTagPattern.objects.filter( + collection=curated_url.collection, + match_pattern=curated_url.url, + match_pattern_type=1, + operation=OperationChoices.REMOVE, + source=source, + ) + ) + + logger.info(f"Found {len(remove_patterns)} REMOVE patterns") + + if remove_patterns: + # Update the first REMOVE pattern + remove_pattern = remove_patterns[0] + current_tags = remove_pattern.tags or [] + if tag not in current_tags: + remove_pattern.tags = current_tags + [tag] + remove_pattern.save() + logger.info(f"Updated REMOVE pattern {remove_pattern.id} with tags {remove_pattern.tags}") + else: + # Create new REMOVE pattern + remove_pattern = DeltaTdammTagPattern.objects.create( + collection=curated_url.collection, + match_pattern=curated_url.url, + match_pattern_type=1, + operation=OperationChoices.REMOVE, + source=source, + tags=[tag], + ) + logger.info(f"Created new REMOVE pattern {remove_pattern.id} with tag {tag}") + + # Clean up if delta becomes identical to curated + if not delta_url.to_delete and delta_url._fields_match(curated_url): + logger.info(f"Deleting delta URL {delta_url.id} as it matches curated URL") + delta_url.delete() + + return Response({"status": "success"}) + except Exception as e: + logger.error(f"Error occurred: {str(e)}") + return Response({"error": f"An internal error has occurred: {str(e)}"}, status=500) + + # def _refresh_url_tags(self, url_obj): + # """Refresh URL tags by reapplying all patterns affecting this URL.""" + # # Get all patterns affecting this URL + # add_patterns = DeltaTdammTagPattern.objects.filter( + # collection=url_obj.collection, delta_urls=url_obj, operation=OperationChoices.ADD + # ) + + # remove_patterns = DeltaTdammTagPattern.objects.filter( + # collection=url_obj.collection, delta_urls=url_obj, operation=OperationChoices.REMOVE + # ) + + # # Collect all tags from ADD patterns + # add_tags = set() + # for pattern in add_patterns: + # add_tags.update(pattern.tags or []) + + # # Collect all tags from REMOVE patterns + # remove_tags = set() + # for pattern in remove_patterns: + # remove_tags.update(pattern.tags or []) + + # # Final tags = (add tags) - (remove tags) + # final_tags = list(add_tags - remove_tags) + + # # Update URL tags + # url_obj.tdamm_tag_manual = final_tags if final_tags else None + # url_obj.save() + class DeltaURLBulkCreateView(generics.ListCreateAPIView): queryset = DeltaUrl.objects.all() @@ -490,6 +1243,55 @@ def create(self, request, *args, **kwargs): return Response(status=status.HTTP_400_BAD_REQUEST, data={"error": "Division is required."}) +# class TdammTagPatternViewSet(CollectionFilterMixin, viewsets.ModelViewSet): +# queryset = DeltaTdammTagPattern.objects.all() +# serializer_class = TdammTagPatternSerializer + +# def get_queryset(self): +# return super().get_queryset().order_by("match_pattern") + + +class TdammTagPatternViewSet(CollectionFilterMixin, viewsets.ModelViewSet): + queryset = DeltaTdammTagPattern.objects.all() + serializer_class = TdammTagPatternSerializer + + # def create(self, request, *args, **kwargs): + # # Handle single tag parameter from frontend + # tag = request.data.get('tag') + # if tag: + # # Make a mutable copy of the data + # data = request.data.copy() + # # Convert single tag to tags array + # data['tags'] = [tag] + # # Use this modified data for serialization + # serializer = self.get_serializer(data=data) + # serializer.is_valid(raise_exception=True) + # self.perform_create(serializer) + # headers = self.get_success_headers(serializer.data) + # return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers) + + # return super().create(request, *args, **kwargs) + + def create(self, request, *args, **kwargs): + data = request.data.copy() + + if "tag" in data: + tag = data.pop("tag") + data["tags"] = [tag] + + # Create serializer with cleaned data + serializer = self.get_serializer(data=data) + + # Print data for debugging + print(f"Data going to serializer: {data}") + + # Continue with validation and creation + serializer.is_valid(raise_exception=True) + self.perform_create(serializer) + headers = self.get_success_headers(serializer.data) + return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers) + + class CollectionViewSet(viewsets.ModelViewSet): queryset = Collection.objects.all() serializer_class = CollectionSerializer diff --git a/sde_indexing_helper/static/css/delta_url_list.css b/sde_indexing_helper/static/css/delta_url_list.css index 591bc070..ed2706b1 100644 --- a/sde_indexing_helper/static/css/delta_url_list.css +++ b/sde_indexing_helper/static/css/delta_url_list.css @@ -447,6 +447,138 @@ div.dt-container div.dt-paging ul.pagination { min-width: 100%; } +.table_filter_row_input { + width: 100%; +} + +.tdamm-tags-container { + display: flex; + flex-wrap: wrap; + gap: 4px; + justify-content: center; +} + +.tdamm-tag { + display: inline-flex; + align-items: center; + background: #304050; + border-radius: 16px; + padding: 2px 8px; + margin: 2px; + color: white; + font-size: 0.9em; + position: relative; + cursor: help; +} + +.tdamm-tag::after { + content: attr(data-full-name); + position: absolute; + bottom: 100%; + left: 50%; + transform: translateX(-50%); + padding: 6px 12px; + background: #15232E; + border: 1px solid #A7BACD; + border-radius: 4px; + font-size: 0.85em; + white-space: nowrap; + opacity: 0; + visibility: hidden; + transition: opacity 0.2s, visibility 0.2s; + z-index: 1000; +} + +.tdamm-tag:hover::after { + opacity: 1; + visibility: visible; +} + +.tdamm-tags-container[data-source="manual"] .tdamm-tag { + background: #31a728; +} + +.tdamm-tags-container[data-source="ml"] .tdamm-tag { + background: #00a2ca; +} + +.tdamm-tags-container[data-source="Not Set"] .tdamm-tag { + background: #304050; +} + +.delete-tag { + background: none; + border: none; + color: #ff9999; + margin-left: 4px; + padding: 0 4px; + cursor: pointer; + font-size: 1.2em; + line-height: 1; +} + +.delete-tag:hover { + color: #ff0000; +} + +.tdamm-dropdown { + position: absolute; + min-width: 300px; + background: #15232E; + border: 2px solid red !important; + border-radius: 4px; + box-shadow: 0 2px 10px rgba(0,0,0,0.3); + z-index: 9999; +} + +.tdamm-dropdown .dropdown-menu { + position: static !important; + display: block !important; + min-width: 100%; + max-height: 400px; + overflow-y: auto; + padding: 10px; + background: none; + border: none; + margin: 0; +} + +.tdamm-options { + min-height: 50px; + background: rgba(255,255,255,0.1); +} + +.tdamm-option:hover { + background: #0066CA; +} + +.add-tdamm-tag { + margin-left: 8px; + padding: 2px 8px; + font-size: 0.9em; +} + +.tdamm-dropdown-container { + position: absolute; + background: #15232E; + border: 1px solid #A7BACD; + border-radius: 4px; + box-shadow: 0 2px 10px rgba(0,0,0,0.3); + min-width: 300px; +} + +.tdamm-options-list { + max-height: 400px; + overflow-y: auto; +} + +.tdamm-option { + padding: 8px 12px; + cursor: pointer; + color: white; + transition: background-color 0.2s; +} + #delta_urls_table_wrapper .col-md { display: flex; justify-content: space-between; diff --git a/sde_indexing_helper/static/js/delta_url_list.js b/sde_indexing_helper/static/js/delta_url_list.js index c961a981..a59f4ca2 100644 --- a/sde_indexing_helper/static/js/delta_url_list.js +++ b/sde_indexing_helper/static/js/delta_url_list.js @@ -9,6 +9,7 @@ var newExcludePatternsCount = 0; var newTitlePatternsCount = 0; var newDocumentTypePatternsCount = 0; var newDivisionPatternsCount = 0; +var newTdammTagPatternsCount = 0; var currentTab = ""; //blank for the first tab var matchPatternTypeMap = { "Individual URL Pattern": 1, @@ -258,6 +259,7 @@ function initializeDataTable() { getScrapedTitleColumn(), getGeneratedTitleColumn(), getDocumentTypeColumn(), + getTdammTagColumn(), getDivisionColumn(), { data: "id", visible: false, searchable: false }, { data: "exclude_pattern_type", visible: false, searchable: false }, @@ -266,6 +268,7 @@ function initializeDataTable() { { data: "match_pattern_type", visible: false, searchable: false }, { data: "delta_urls_count", visible: false, searchable: false }, { data: "excluded", visible: false, searchable: false }, + { data: "tag_source", visible: false, searchable: false }, { data: null, render: function (data, type, row) { @@ -476,12 +479,14 @@ function initializeDataTable() { getCuratedScrapedTitleColumn(), getCuratedGeneratedTitleColumn(), getCuratedDocumentTypeColumn(), + getTdammTagColumn(), getCuratedDivisionColumn(), { data: "id", visible: false, searchable: false }, { data: "generated_title_id", visible: false, searchable: false }, { data: "match_pattern_type", visible: false, searchable: false }, { data: "curated_urls_count", visible: false, searchable: false }, { data: "excluded", visible: false, searchable: false }, + { data: "tag_source", visible: false, searchable: false }, { data: null, render: function (data, type, row) { @@ -802,6 +807,10 @@ function initializeDataTable() { title_patterns_table.columns(2).search(this.value).draw(); }); + $("#deltaTdammTagFilter").on("beforeinput", DataTable.util.debounce(function (val) { + delta_urls_table.column('tdamm_tag:name').search(this.value).draw(); + }, 1000)); + var document_type_patterns_table = $( "#document_type_patterns_table" ).DataTable({ @@ -966,6 +975,291 @@ $("#deltaDivisionMatchPatternFilter").on("beforeinput", function (val) { division_patterns_table.columns(0).search(this.value).draw(); }); +var tdamm_tag_patterns_table = $("#tdamm_tag_patterns_table").DataTable({ + dom: "lBrtip", + buttons: [ + { + text: "Add Pattern", + className: "addPattern", + action: function () { + $modal = $("#tdammTagPatternModal").modal(); + } + }, + { + text: "Customize Columns", + className: "customizeColumns", + action: function () { + modalContents("#tdamm_tag_patterns_table"); + } + }, + ], + lengthMenu: [ + [25, 50, 100, 500], + ["Show 25", "Show 50", "Show 100", "Show 500"], + ], + orderCellsTop: true, + pageLength: 100, + ajax: `/api/tdamm-tag-patterns/?format=datatables&collection_id=${collection_id}`, + initComplete: function (data) { + this.api() + .columns() + .every(function (index) { + var table = $("#tdamm_tag_patterns_table").DataTable(); + + let addDropdownSelect = { + 1: { + columnToSearch: 9, + matchPattern: { + "Individual URL Pattern": 1, + "Multi-URL Pattern": 2, + } + }, + 2: { + columnToSearch: 10, + matchPattern: {} + }, + 3: { + columnToSearch: 11, + matchPattern: { + "Add Tag": 1, + "Remove Tag": 2 + } + }, + // 4: { + // columnToSearch: 12, + // matchPattern: { + // "manual": "manual", + // "ml": "ml" + // } + // } + }; + + // Populate TDAMM tag mappings + tdamm_choices.forEach(choice => { + addDropdownSelect[2].matchPattern[choice.display] = choice.code; + }); + + let column = this; + if (column.data().length === 0) { + $(`#tdamm-patterns-dropdown-${index}`).prop("disabled", true); + } else if (index in addDropdownSelect) { + $("#tdamm-patterns-dropdown-" + index).on("change", function () { + let col = addDropdownSelect[index].columnToSearch; + let searchInput = addDropdownSelect[index].matchPattern[$(this).val()]; + if ($(this).val() === "" || $(this).val() === undefined) + table.columns(col).search("").draw(); + else { + table.columns(col).search(searchInput).draw(); + } + }); + } + }); + }, + + columns: [ + { data: "match_pattern", class: "whiteText" }, + { + data: "match_pattern_type_display", + class: "text-center whiteText", + sortable: false, + }, + { + data: "tags_display", + class: "whiteText", + render: function(data, type, row) { + if (!data || !data.length) return "None"; + return Array.isArray(data) ? data.join(", ") : data; + } + }, + { + data: "operation_display", + class: "whiteText" + }, + // { + // data: "source", + // class: "whiteText" + // }, + { + data: "delta_urls_count", + class: "text-center whiteText", + sortable: true, + }, + { + data: "curated_urls_count", + class: "text-center whiteText", + sortable: true, + }, + { + data: null, + sortable: false, + class: "text-center", + render: function (data, type, row) { + return ``; + }, + }, + { data: "id", visible: false, searchable: false }, + { data: "match_pattern_type", visible: false }, + { data: "tags", visible: false }, + { data: "operation", visible: false }, + // { data: "source", visible: false }, + ], +}); + +$("#tdammTagMatchPatternFilter").on("beforeinput", function (val) { + tdamm_tag_patterns_table.columns(0).search(this.value).draw(); +}); + +// Handle form submission for TDAMM tag pattern +$("#tdamm_tag_pattern_form").on("submit", function (e) { + e.preventDefault(); + inputs = {}; + input_serialized = $(this).serializeArray(); + input_serialized.forEach((field) => { + inputs[field.name] = field.value; + }); + + postTdammTagPattern( + inputs.match_pattern, + inputs.match_pattern_type, + inputs.tdamm_tag, + inputs.tdamm_operation, + // inputs.tdamm_source + "manual" + ); + + // Close the modal + $("#tdammTagPatternModal").modal("hide"); +}); + +// Handle TDAMM tag form dropdown selections +$(".tdamm_tag_form_select").on("click", function (e) { + e.preventDefault(); + $('input[name="tdamm_tag"]').val($(this).attr("value")); + $(".tdamm-tag-dropdown").text($(this).text()); +}); + +$(".tdamm_operation_form_select").on("click", function (e) { + e.preventDefault(); + $('input[name="tdamm_operation"]').val($(this).attr("value")); + $(".operation-dropdown").text($(this).text()); +}); + +// $(".tdamm_source_form_select").on("click", function (e) { +// e.preventDefault(); +// $('input[name="tdamm_source"]').val($(this).attr("value")); +// $(".source-dropdown").text($(this).text()); +// }); + +// Handle delete button click for TDAMM tag patterns +function handleDeleteTdammTagPatternButtonClick() { + $("body").on("click", ".delete-tdamm-tag-pattern-button", function () { + var patternRowId = $(this).data("row-id"); + currentURLtoDelete = `/api/tdamm-tag-patterns/${patternRowId}/`; + deletePattern( + `/api/tdamm-tag-patterns/${patternRowId}/`, + "TDAMM Tag Pattern" + ); + }); +} + +// function postTdammTagPattern(match_pattern, match_pattern_type, tag, operation, source="manual") { +// console.log("Posting tag pattern:", { +// match_pattern, +// match_pattern_type, +// tag, +// operation, +// source +// }); + +// if (!match_pattern) { +// toastr.error("Please provide a match pattern."); +// return; +// } + +// if (!tag) { +// toastr.error("Please select a TDAMM tag."); +// return; +// } + +// if (!operation) { +// toastr.error("Please select an operation (Add or Remove)."); +// return; +// } + +// if (!source) { +// toastr.error("Please select a source (Manual or ML)."); +// return; +// } + +// $.ajax({ +// url: "/api/tdamm-tag-patterns/", +// type: "POST", +// data: { +// collection: collection_id, +// match_pattern: match_pattern, +// match_pattern_type: match_pattern_type, +// tags: [tag], +// operation: operation, +// source: source, +// csrfmiddlewaretoken: csrftoken, +// }, +// success: function (data) { +// console.log("Success:", data); +// $("#delta_urls_table").DataTable().ajax.reload(null, false); +// $("#tdamm_tag_patterns_table").DataTable().ajax.reload(null, false); +// if (currentTab === "") { // Only add a notification if we are on the first tab +// newTdammTagPatternsCount = newTdammTagPatternsCount + 1; +// $("#tdammTagPatternsTab").html( +// `TDAMM Tag Patterns ` + +// newTdammTagPatternsCount + " new" + +// `` +// ); +// } +// }, +// error: function (xhr, status, error) { +// console.error("Error:", xhr.responseText); +// var errorMessage = xhr.responseText; +// if (errorMessage.includes("unique")) { +// toastr.success("Pattern already exists"); +// return; +// } +// toastr.error(errorMessage); +// }, +// }); +// } + +function postTdammTagPattern(match_pattern, match_pattern_type, tag, operation, source="manual") { + // Log to see what's being sent + console.log("Sending tag:", tag); + + // Create FormData to properly handle the array + var formData = new FormData(); + formData.append('collection', collection_id); + formData.append('match_pattern', match_pattern); + formData.append('match_pattern_type', match_pattern_type); + formData.append('operation', operation); + formData.append('source', source); + formData.append('csrfmiddlewaretoken', csrftoken); + + // Add tag as individual value (server will convert to array) + formData.append('tag', tag); + + $.ajax({ + url: "/api/tdamm-tag-patterns/", + type: "POST", + data: formData, + processData: false, + contentType: false, + success: function(data) { + console.log("Success:", data); + // Rest of success handler + }, + error: function(xhr) { + console.error("Error:", xhr.responseText); + // Rest of error handler + } + }); +} function handleTabsClick() { $("#includePatternsTab").on("click", function () { @@ -988,6 +1282,10 @@ function handleTabsClick() { newDivisionPatternsCount = 0; $("#divisionPatternsTab").html(`Division Patterns`); }); + $("#tdammTagPatternsTab").on("click", function () { + newTdammTagPatternsCount = 0; + $("#tdammTagPatternsTab").html(`TDAMM Tag Patterns`); + }); } function setupClickHandlers() { @@ -1000,11 +1298,14 @@ function setupClickHandlers() { handleDeleteIncludePatternButtonClick(); handleDeleteTitlePatternButtonClick(); handleDeleteDivisionButtonClick(); + handleDeleteTdammTagPatternButtonClick(); handleDocumentTypeSelect(); handleDivisionSelect(); handleExcludeIndividualUrlClick(); handleNewTitleChange(); + handleTagDeletion(); + handleTagAddition(); handleUrlLinkClick(); handleTabsClick(); @@ -1285,6 +1586,55 @@ function getCuratedExcludedColumn(true_icon, false_icon) { }; } +function getTdammTagColumn() { + return { + data: "tdamm_tag", + width: "10%", + visible: (has_tdamm_tags === 'true'), + className: "text-center", + render: function(data, type, row) { + if (!data || !data.length) { + return `
+ +
`; + } + // console.log('Row data for tags:', { + // data, + // tagSource: row.tag_source, + // fullRow: row + // }); + + const tagSource = row.tag_source; + + const tags = data.map(tag => { + const fullName = tdamm_choices.find(choice => choice.code === tag)?.label || tag; + return ` +
+ ${tag} + +
+ `; + }).join(''); + + return `
+ ${tags} + +
`; + } + }; +} + function getDocumentTypeColumn() { return { data: "document_type", @@ -1398,6 +1748,7 @@ function handleHideorShowKeypress() { addEnterEscapeKeypress("#titlePatternModal", "#title_pattern_form"); addEnterEscapeKeypress("#documentTypePatternModal", "#document_type_pattern_form"); addEnterEscapeKeypress("#divisionPatternModal", "#division_pattern_form"); + addEnterEscapeKeypress("#tdammTagPatternModal", "#tdamm_tag_pattern_form"); } @@ -1969,6 +2320,11 @@ function division_pattern_form(selected_text) { $modal.find("#division_match_pattern_input").val(selected_text); // Updated to match the HTML ID } +function tdamm_tag_pattern_form(selected_text) { + $modal = $("#tdammTagPatternModal").modal(); + $modal.find("#tdamm_match_pattern_input").val(selected_text); +} + // If the menu element is clicked $(".custom-menu li").click(function () { // This is the triggered action name @@ -1988,6 +2344,9 @@ $(".custom-menu li").click(function () { case "division-pattern": division_pattern_form(selected_text.trim()); break; + case "tdamm-tag-pattern": + tdamm_tag_pattern_form(selected_text.trim()); + break; } // Hide it AFTER the action was triggered @@ -2274,3 +2633,167 @@ function handleReindexingStatusSelect() { }); }); } + +function getApiEndpoint($element) { + return $element.closest('#curated_urls_table').length > 0 ? 'curated-urls' : 'delta-urls'; +} + +function handleTagDeletion() { + let currentTagData = null; + let $clickedButton = null; + + $("body").on("click", ".delete-tag", function(e) { + e.preventDefault(); + e.stopPropagation(); + + $clickedButton = $(this); + const urlId = $clickedButton.data("url-id"); + const tagToDelete = $clickedButton.data("tag"); + const source = $clickedButton.data("source"); + + currentTagData = { urlId, tagToDelete, source }; + + // console.log('Tag deletion data:', { + // urlId, + // tagToDelete, + // source, + // buttonData: $button.data() + // }); + + // Confirm deletion + $("#deleteTagModal").modal(); + $(".delete-tag-caption").text(`Are you sure you want to remove the tag "${tagToDelete}"?`); + }); + + $("#deleteTagModalForm").on("click", "button", function(event) { + event.preventDefault(); + const buttonId = $(this).attr("id"); + + if (buttonId === "dontDeleteTag") { + $("#deleteTagModal").modal("hide"); + return; + } + + if (buttonId === "deleteTag" && currentTagData) { + const { urlId, tagToDelete, source } = currentTagData; + const apiEndpoint = getApiEndpoint($clickedButton); + + $.ajax({ + url: `/api/${apiEndpoint}/${urlId}/remove_tag/`, + type: "POST", + data: { + tag: tagToDelete, + source: source, + csrfmiddlewaretoken: csrftoken + }, + success: function(response) { + $("#deleteTagModal").modal("hide"); + $("#delta_urls_table").DataTable().ajax.reload(null, false); + $("#curated_urls_table").DataTable().ajax.reload(null, false); + toastr.success("Tag removed successfully"); + }, + error: function(xhr, status, error) { + toastr.error("Error removing tag: " + error); + } + }); + } + }); +} + +function handleTagAddition() { + let activeDropdown = null; + + function createDropdownContent() { + return tdamm_choices.map(choice => + `
+ ${choice.display} +
` + ).join(''); + } + + function hideDropdown(dropdown) { + if (dropdown) { + dropdown.remove(); + activeDropdown = null; + } + } + + $('body').on('click', '.add-tdamm-tag', function(e) { + // console.log("Add tag button clicked"); + e.preventDefault(); + e.stopPropagation(); + + if (activeDropdown) { + hideDropdown(activeDropdown); + } + + const $button = $(this); + const urlId = $button.data('url-id'); + const source = $button.data('source'); + + // console.log("Button data:", { urlId, source }); + + // Clone dropdown template + const dropdown = $('#tdammDropdownTemplate').children().first().clone(); + // console.log("Cloned template:", dropdown.html()); + + const optionsList = dropdown.find('.tdamm-options-list'); + optionsList.html(createDropdownContent()); + + // Position dropdown + dropdown.css({ + position: 'absolute', + top: $button.offset().top + $button.outerHeight() + 5, + left: $button.offset().left, + display: 'block', + width: '300px', + // zIndex: 1000 + zIndex: 9999 + // background: 'red', + // border: '2px solid yellow' + }); + + optionsList.on('click', '.tdamm-option', function() { + const selectedTag = $(this).data('value'); + const apiEndpoint = getApiEndpoint($button); + + $.ajax({ + url: `/api/${apiEndpoint}/${urlId}/add_tag/`, + type: 'POST', + data: { + tag: selectedTag, + source: source, + csrfmiddlewaretoken: csrftoken + }, + success: function(response) { + $("#delta_urls_table").DataTable().ajax.reload(null, false); + $("#curated_urls_table").DataTable().ajax.reload(null, false); + toastr.success("Tag added successfully"); + hideDropdown(dropdown[0]); + }, + error: function(xhr, status, error) { + toastr.error("Error adding tag: " + error); + } + }); + }); + + if ($button.offset().left + 300 > $(window).width()) { + dropdown.css({ + left: 'auto', + right: $(window).width() - ($button.offset().left + $button.outerWidth()) + }); + } + + // Add to document and store reference + $('body').append(dropdown); + // console.log("Dropdown appended to body:", dropdown); + activeDropdown = dropdown[0]; + }); + + // Close dropdown when clicking outside + $(document).on('click', function(e) { + if (activeDropdown && !$(e.target).closest('.tdamm-dropdown').length) { + hideDropdown(activeDropdown); + } + }); +} diff --git a/sde_indexing_helper/templates/sde_collections/delta_urls_list.html b/sde_indexing_helper/templates/sde_collections/delta_urls_list.html index 08ae74ef..b98ad328 100644 --- a/sde_indexing_helper/templates/sde_collections/delta_urls_list.html +++ b/sde_indexing_helper/templates/sde_collections/delta_urls_list.html @@ -98,6 +98,9 @@

Division Patterns {% endif %} + @@ -112,6 +115,7 @@

Scraped Title
New Title
Document Type
+
TDAMM Tags
Division
ID
@@ -145,6 +149,14 @@

+ + + + + + + + + + + + + + + + + @@ -422,6 +487,7 @@

  • Create Title Pattern
  • Create Document Type Pattern
  • Create Division Pattern
  • +
  • Create TDAMM Tag Pattern
  • - + + + + + {% endblock content %} {% block javascripts %} {{ block.super }} + + {{ tdamm_choices|json_script:"tdamm-choices-data" }} +