feat: Add genre exclusion UI, store excluded genres in user settings,and apply them during catalog generation (#21)

TimilsinaBimal · web-flow · commit c7edd8975813 · 2025-12-06T15:01:00.000+05:45
* feat: Add genre exclusion UI, store excluded genres in user settings, and apply them during catalog generation.

* refactor: simplify filtering recommendations by excluded genres using list comprehension

* refactor: streamline genre exclusion logic for similarity recommendations
diff --git a/app/api/endpoints/tokens.py b/app/api/endpoints/tokens.py
@@ -21,6 +21,8 @@ class TokenRequest(BaseModel):
     catalogs: list[CatalogConfig] | None = Field(default=None, description="Optional catalog configuration")
     language: str = Field(default="en-US", description="Language for TMDB API")
     rpdb_key: str | None = Field(default=None, description="Optional RPDB API Key")
+    excluded_movie_genres: list[str] = Field(default_factory=list, description="List of movie genre IDs to exclude")
+    excluded_series_genres: list[str] = Field(default_factory=list, description="List of series genre IDs to exclude")
 
 
 class TokenResponse(BaseModel):
@@ -130,6 +132,8 @@ async def create_token(payload: TokenRequest, request: Request) -> TokenResponse
         language=payload.language or default_settings.language,
         catalogs=payload.catalogs if payload.catalogs else default_settings.catalogs,
         rpdb_key=rpdb_key,
+        excluded_movie_genres=payload.excluded_movie_genres,
+        excluded_series_genres=payload.excluded_series_genres,
     )
 
     # encode_settings now includes the "settings:" prefix
diff --git a/app/core/settings.py b/app/core/settings.py
@@ -14,6 +14,8 @@ class UserSettings(BaseModel):
     catalogs: list[CatalogConfig]
     language: str = "en-US"
     rpdb_key: str | None = None
+    excluded_movie_genres: list[str] = []
+    excluded_series_genres: list[str] = []
 
 
 def encode_settings(settings: UserSettings) -> str:
diff --git a/app/services/catalog.py b/app/services/catalog.py
@@ -45,7 +45,9 @@ def build_catalog_entry(self, item, label, config_id):
             "extra": [],
         }
 
-    async def get_theme_based_catalogs(self, library_items: list[dict]) -> list[dict]:
+    async def get_theme_based_catalogs(
+        self, library_items: list[dict], user_settings: UserSettings | None = None
+    ) -> list[dict]:
         catalogs = []
         # 1. Build User Profile
         # Combine loved and watched
@@ -65,16 +67,27 @@ async def get_theme_based_catalogs(self, library_items: list[dict]) -> list[dict
             scored_obj = self.scoring_service.process_item(item_data)
             scored_objects.append(scored_obj)
 
+        # Get excluded genres
+        excluded_movie_genres = []
+        excluded_series_genres = []
+        if user_settings:
+            excluded_movie_genres = [int(g) for g in user_settings.excluded_movie_genres]
+            excluded_series_genres = [int(g) for g in user_settings.excluded_series_genres]
+
         # 2. Generate Thematic Rows with Type-Specific Profiles
         # Generate for Movies
-        movie_profile = await self.user_profile_service.build_user_profile(scored_objects, content_type="movie")
+        movie_profile = await self.user_profile_service.build_user_profile(
+            scored_objects, content_type="movie", excluded_genres=excluded_movie_genres
+        )
         movie_rows = await self.row_generator.generate_rows(movie_profile, "movie")
 
         for row in movie_rows:
             catalogs.append({"type": "movie", "id": row.id, "name": row.title, "extra": []})
 
         # Generate for Series
-        series_profile = await self.user_profile_service.build_user_profile(scored_objects, content_type="series")
+        series_profile = await self.user_profile_service.build_user_profile(
+            scored_objects, content_type="series", excluded_genres=excluded_series_genres
+        )
         series_rows = await self.row_generator.generate_rows(series_profile, "series")
 
         for row in series_rows:
@@ -98,7 +111,7 @@ async def get_dynamic_catalogs(
         catalogs = []
 
         if include_theme_based_rows:
-            catalogs.extend(await self.get_theme_based_catalogs(library_items))
+            catalogs.extend(await self.get_theme_based_catalogs(library_items, user_settings))
 
         # 3. Add Item-Based Rows
         if include_item_based_rows:
diff --git a/app/services/discovery.py b/app/services/discovery.py
@@ -14,7 +14,11 @@ def __init__(self):
         self.tmdb_service = TMDBService()
 
     async def discover_recommendations(
-        self, profile: UserTasteProfile, content_type: str, limit: int = 20
+        self,
+        profile: UserTasteProfile,
+        content_type: str,
+        limit: int = 20,
+        excluded_genres: list[int] | None = None,
     ) -> list[dict]:
         """
         Find content that matches the user's taste profile.
@@ -33,52 +37,78 @@ async def discover_recommendations(
         top_crew = profile.get_top_crew(limit=1)  # e.g. [(555, 1.0)] - Director
 
         top_countries = profile.get_top_countries(limit=2)
+        top_year = profile.get_top_year(limit=1)
 
         if not top_genres and not top_keywords and not top_cast:
             # Fallback if profile is empty
             return []
 
         tasks = []
+        base_params = {}
+        if excluded_genres:
+            base_params["without_genres"] = "|".join([str(g) for g in excluded_genres])
 
         # Query 1: Top Genres Mix
         if top_genres:
             genre_ids = "|".join([str(g[0]) for g in top_genres])
-            params_popular = {"with_genres": genre_ids, "sort_by": "popularity.desc", "vote_count.gte": 100}
+            params_popular = {
+                "with_genres": genre_ids,
+                "sort_by": "popularity.desc",
+                "vote_count.gte": 500,
+                **base_params,
+            }
             tasks.append(self._fetch_discovery(content_type, params_popular))
 
             # fetch atleast two pages of results
             for i in range(2):
                 params_rating = {
                     "with_genres": genre_ids,
                     "sort_by": "ratings.desc",
-                    "vote_count.gte": 300,
+                    "vote_count.gte": 500,
                     "page": i + 1,
+                    **base_params,
                 }
                 tasks.append(self._fetch_discovery(content_type, params_rating))
 
         # Query 2: Top Keywords
         if top_keywords:
             keyword_ids = "|".join([str(k[0]) for k in top_keywords])
-            params_keywords = {"with_keywords": keyword_ids, "sort_by": "popularity.desc"}
+            params_keywords = {
+                "with_keywords": keyword_ids,
+                "sort_by": "popularity.desc",
+                "vote_count.gte": 500,
+                **base_params,
+            }
             tasks.append(self._fetch_discovery(content_type, params_keywords))
 
             # fetch atleast two pages of results
             for i in range(3):
                 params_rating = {
                     "with_keywords": keyword_ids,
                     "sort_by": "ratings.desc",
-                    "vote_count.gte": 300,
+                    "vote_count.gte": 500,
                     "page": i + 1,
+                    **base_params,
                 }
                 tasks.append(self._fetch_discovery(content_type, params_rating))
 
         # Query 3: Top Actors
         for actor in top_cast:
             actor_id = actor[0]
-            params_actor = {"with_cast": str(actor_id), "sort_by": "popularity.desc"}
+            params_actor = {
+                "with_cast": str(actor_id),
+                "sort_by": "popularity.desc",
+                "vote_count.gte": 500,
+                **base_params,
+            }
             tasks.append(self._fetch_discovery(content_type, params_actor))
 
-            params_rating = {"with_cast": str(actor_id), "sort_by": "ratings.desc", "vote_count.gte": 300}
+            params_rating = {
+                "with_cast": str(actor_id),
+                "sort_by": "ratings.desc",
+                "vote_count.gte": 500,
+                **base_params,
+            }
             tasks.append(self._fetch_discovery(content_type, params_rating))
 
         # Query 4: Top Director
@@ -87,19 +117,47 @@ async def discover_recommendations(
             params_director = {
                 "with_crew": str(director_id),
                 "sort_by": "vote_average.desc",  # Directors imply quality preference
+                "vote_count.gte": 500,
+                **base_params,
             }
             tasks.append(self._fetch_discovery(content_type, params_director))
 
-            params_rating = {"with_crew": str(director_id), "sort_by": "ratings.desc", "vote_count.gte": 300}
+            params_rating = {
+                "with_crew": str(director_id),
+                "sort_by": "ratings.desc",
+                "vote_count.gte": 500,
+                **base_params,
+            }
             tasks.append(self._fetch_discovery(content_type, params_rating))
 
         # Query 5: Top Countries
         if top_countries:
             country_ids = "|".join([str(c[0]) for c in top_countries])
-            params_country = {"with_origin_country": country_ids, "sort_by": "popularity.desc", "vote_count.gte": 100}
+            params_country = {
+                "with_origin_country": country_ids,
+                "sort_by": "popularity.desc",
+                "vote_count.gte": 100,
+                **base_params,
+            }
             tasks.append(self._fetch_discovery(content_type, params_country))
 
-            params_rating = {"with_origin_country": country_ids, "sort_by": "ratings.desc", "vote_count.gte": 300}
+            params_rating = {
+                "with_origin_country": country_ids,
+                "sort_by": "ratings.desc",
+                "vote_count.gte": 300,
+                **base_params,
+            }
+            tasks.append(self._fetch_discovery(content_type, params_rating))
+
+        # query 6: Top year
+        if top_year:
+            year = top_year[0][0]
+            params_rating = {
+                "year": year,
+                "sort_by": "ratings.desc",
+                "vote_count.gte": 500,
+                **base_params,
+            }
             tasks.append(self._fetch_discovery(content_type, params_rating))
 
         # 3. Execute Parallel Queries
diff --git a/app/services/recommendation_service.py b/app/services/recommendation_service.py
@@ -254,6 +254,15 @@ async def get_recommendations_for_item(self, item_id: str) -> list[dict]:
         # 1. Filter by TMDB ID
         recommendations = await self._filter_candidates(recommendations, watched_imdb, watched_tmdb)
 
+        # 1.5 Filter by Excluded Genres
+        # We need to detect content_type from item_id or media_type to know which exclusion list to use.
+        # media_type is already resolved above.
+        excluded_ids = set(self._get_excluded_genre_ids(media_type))
+        if excluded_ids:
+            recommendations = [
+                item for item in recommendations if not excluded_ids.intersection(item.get("genre_ids") or [])
+            ]
+
         # 2. Fetch Metadata (gets IMDB IDs)
         meta_items = await self._fetch_metadata_for_items(recommendations, media_type)
 
@@ -278,6 +287,15 @@ async def get_recommendations_for_item(self, item_id: str) -> list[dict]:
         logger.info(f"Found {len(final_items)} valid recommendations for {item_id}")
         return final_items
 
+    def _get_excluded_genre_ids(self, content_type: str) -> list[int]:
+        if not self.user_settings:
+            return []
+        if content_type == "movie":
+            return [int(g) for g in self.user_settings.excluded_movie_genres]
+        elif content_type in ["series", "tv"]:
+            return [int(g) for g in self.user_settings.excluded_series_genres]
+        return []
+
     async def get_recommendations_for_theme(self, theme_id: str, content_type: str, limit: int = 20) -> list[dict]:
         """
         Parse a dynamic theme ID and fetch recommendations.
@@ -315,6 +333,16 @@ async def get_recommendations_for_theme(self, theme_id: str, content_type: str,
         if "sort_by" not in params:
             params["sort_by"] = "popularity.desc"
 
+        # Apply Excluded Genres
+        excluded_ids = self._get_excluded_genre_ids(content_type)
+        if excluded_ids:
+            # If with_genres is specified, we technically shouldn't exclude what is explicitly asked for?
+            # But the user asked to "exclude those genres".
+            # If I exclude them from "without_genres", TMDB might return 0 results if the theme IS that genre.
+            # But RowGenerator safeguards against generating themes for excluded genres.
+            # So this is safe for keyword/country rows.
+            params["without_genres"] = "|".join(str(g) for g in excluded_ids)
+
         # Fetch
         recommendations = await self.tmdb_service.get_discover(content_type, **params)
         candidates = recommendations.get("results", [])
@@ -407,15 +435,25 @@ async def get_recommendations(
             tasks_a.append(self._fetch_recommendations_from_tmdb(source.get("_id"), source.get("type"), limit=10))
         similarity_candidates = []
         similarity_recommendations = await asyncio.gather(*tasks_a, return_exceptions=True)
+
+        excluded_ids = set(self._get_excluded_genre_ids(content_type))
+
         similarity_recommendations = [item for item in similarity_recommendations if not isinstance(item, Exception)]
-        for item in similarity_recommendations:
-            similarity_candidates.extend(item)
+        for batch in similarity_recommendations:
+            similarity_candidates.extend(
+                item for item in batch if not excluded_ids.intersection(item.get("genre_ids") or [])
+            )
 
         # --- Candidate Set B: Profile-based Discovery ---
+        # Extract excluded genres
+        excluded_genres = list(excluded_ids)  # Convert back to list for consistency
+
         # Use typed profile based on content_type
-        user_profile = await self.user_profile_service.build_user_profile(scored_objects, content_type=content_type)
+        user_profile = await self.user_profile_service.build_user_profile(
+            scored_objects, content_type=content_type, excluded_genres=excluded_genres
+        )
         discovery_candidates = await self.discovery_engine.discover_recommendations(
-            user_profile, content_type, limit=20
+            user_profile, content_type, limit=20, excluded_genres=excluded_genres
         )
 
         # --- Combine & Deduplicate ---
diff --git a/app/services/user_profile.py b/app/services/user_profile.py
@@ -38,7 +38,10 @@ def __init__(self):
         self.tmdb_service = TMDBService()
 
     async def build_user_profile(
-        self, scored_items: list[ScoredItem], content_type: str | None = None
+        self,
+        scored_items: list[ScoredItem],
+        content_type: str | None = None,
+        excluded_genres: list[int] | None = None,
     ) -> UserTasteProfile:
         """
         Aggregates multiple item vectors into a single User Taste Profile.
@@ -76,7 +79,7 @@ async def build_user_profile(
             # Scale by Interest Score (0.0 - 1.0)
             interest_weight = item.score / 100.0
 
-            self._merge_vector(profile_data, item_vector, interest_weight)
+            self._merge_vector(profile_data, item_vector, interest_weight, excluded_genres)
 
         # Convert to Pydantic Model
         profile = UserTasteProfile(
@@ -206,7 +209,13 @@ def _vectorize_item(self, meta: dict) -> dict[str, list[int] | int | list[str] |
 
         return vector
 
-    def _merge_vector(self, profile: dict, item_vector: dict, weight: float):
+    def _merge_vector(
+        self,
+        profile: dict,
+        item_vector: dict,
+        weight: float,
+        excluded_genres: list[int] | None = None,
+    ):
         """Merges an item's sparse vector into the main profile with a weight."""
 
         # Weights for specific dimensions (Feature Importance)
@@ -228,6 +237,8 @@ def _merge_vector(self, profile: dict, item_vector: dict, weight: float):
                     profile["years"][ids] += final_weight
             elif ids:
                 for feature_id in ids:
+                    if dim == "genres" and excluded_genres and feature_id in excluded_genres:
+                        continue
                     profile[dim][feature_id] += final_weight
 
     async def _fetch_full_metadata(self, tmdb_id: int, type_: str) -> dict | None:
diff --git a/static/index.html b/static/index.html
@@ -208,6 +208,31 @@
 
                 <div class="border-t border-slate-800"></div>
 
+                <!-- Genre Exclusion -->
+                <div class="space-y-6">
+                    <label class="block text-sm font-medium text-slate-400 uppercase tracking-wider">Exclude Genres</label>
+
+                    <div class="grid md:grid-cols-2 gap-6">
+                        <!-- Movie Genres -->
+                        <div class="space-y-3">
+                            <label class="block text-xs font-semibold text-slate-500 uppercase">Movies</label>
+                            <div id="movieGenreList" class="h-48 overflow-y-auto pr-2 space-y-2 custom-scrollbar bg-slate-800/50 rounded-xl p-3 border border-slate-700/50">
+                                <!-- Populated by JS -->
+                            </div>
+                        </div>
+
+                        <!-- Series Genres -->
+                        <div class="space-y-3">
+                            <label class="block text-xs font-semibold text-slate-500 uppercase">Series</label>
+                            <div id="seriesGenreList" class="h-48 overflow-y-auto pr-2 space-y-2 custom-scrollbar bg-slate-800/50 rounded-xl p-3 border border-slate-700/50">
+                                <!-- Populated by JS -->
+                            </div>
+                        </div>
+                    </div>
+                </div>
+
+                <div class="border-t border-slate-800"></div>
+
                 <!-- RPDB API Key -->
                 <div class="space-y-4">
                     <label class="block text-sm font-medium text-slate-400 uppercase tracking-wider">RPDB API Key
diff --git a/static/script.js b/static/script.js