@@ -40,6 +40,38 @@ class MatchPatternTypeChoices(models.IntegerChoices):
4040 related_name = "%(class)ss" , # Makes curated_url.deltaincludepatterns.all()
4141 )
4242
43+ def get_url_match_count (self ):
44+ """
45+ Get the number of unique URLs this pattern matches across both delta and curated URLs.
46+ """
47+ delta_urls = set (self .get_matching_delta_urls ().values_list ("url" , flat = True ))
48+ curated_urls = set (self .get_matching_curated_urls ().values_list ("url" , flat = True ))
49+ return len (delta_urls .union (curated_urls ))
50+
51+ def is_most_distinctive_pattern (self , url ) -> bool :
52+ """
53+ Determine if this pattern should apply to a URL by checking if it matches
54+ the smallest number of URLs among all patterns that match this URL.
55+ Returns True if this pattern should be applied.
56+ """
57+ my_match_count = self .get_url_match_count ()
58+
59+ # Get patterns from same type that affect this URL
60+ pattern_class = self .__class__
61+ matching_patterns = (
62+ pattern_class .objects .filter (collection = self .collection )
63+ .filter (models .Q (delta_urls__url = url .url ) | models .Q (curated_urls__url = url .url ))
64+ .exclude (id = self .id )
65+ .distinct ()
66+ )
67+
68+ # If any matching pattern has a smaller URL set, don't apply
69+ for pattern in matching_patterns :
70+ if pattern .get_url_match_count () < my_match_count :
71+ return False
72+
73+ return True
74+
4375 def get_regex_pattern (self ) -> str :
4476 """Convert the match pattern into a proper regex based on pattern type."""
4577 escaped_pattern = re .escape (self .match_pattern )
@@ -240,6 +272,9 @@ def apply(self) -> None:
240272
241273 # Create DeltaUrls only where field value would change
242274 for curated_url in previously_unaffected_curated :
275+ if not self .is_most_distinctive_pattern (curated_url ):
276+ continue
277+
243278 if (
244279 getattr (curated_url , field ) == new_value
245280 or DeltaUrl .objects .filter (url = curated_url .url , collection = self .collection ).exists ()
@@ -257,8 +292,13 @@ def apply(self) -> None:
257292
258293 DeltaUrl .objects .create (** fields )
259294
260- # Update all matching DeltaUrls with the new field value
261- self .get_matching_delta_urls ().update (** {field : new_value })
295+ # Update all matching DeltaUrls with the new field value if this is the most distinctive pattern
296+ for delta_url in self .get_matching_delta_urls ():
297+ if self .is_most_distinctive_pattern (delta_url ):
298+ setattr (delta_url , field , new_value )
299+ delta_url .save ()
300+
301+ # Update pattern relationships
262302 self .update_affected_delta_urls_list ()
263303
264304 def unapply (self ) -> None :
@@ -388,35 +428,6 @@ def generate_title_for_url(self, url_obj) -> tuple[str, str | None]:
388428 except (ValueError , ValidationError ) as e :
389429 return None , str (e )
390430
391- def get_url_match_count (self ):
392- """
393- Get the number of unique URLs this pattern matches across both delta and curated URLs.
394- """
395- delta_urls = set (self .get_matching_delta_urls ().values_list ("url" , flat = True ))
396- curated_urls = set (self .get_matching_curated_urls ().values_list ("url" , flat = True ))
397- return len (delta_urls .union (curated_urls ))
398-
399- def is_most_distinctive_pattern (self , url ) -> bool :
400- """
401- Determine if this pattern should apply to a URL by checking if it matches
402- the smallest number of URLs among all patterns that match this URL.
403- Returns True if this pattern should be applied.
404- """
405- my_match_count = self .get_url_match_count ()
406-
407- # Get all patterns that match this URL based on match_pattern regex
408- matching_patterns = DeltaTitlePattern .objects .filter (collection = self .collection ).exclude (
409- id = self .id
410- ) # Exclude self to avoid duplicate counting
411-
412- # Filter to only patterns that would match this URL and get their counts
413- for pattern in matching_patterns :
414- if re .match (pattern .get_regex_pattern (), url .url ):
415- if pattern .get_url_match_count () < my_match_count :
416- return False
417-
418- return True
419-
420431 def apply (self ) -> None :
421432 """
422433 Apply the title pattern to matching URLs:
@@ -491,7 +502,6 @@ def apply(self) -> None:
491502
492503 # Update pattern relationships
493504 self .update_affected_delta_urls_list ()
494- self .update_affected_curated_urls_list ()
495505
496506 def unapply (self ) -> None :
497507 """
0 commit comments