sudoleg
diff --git a/‎README.md‎
Lines changed: 10 additions & 3 deletions b/‎README.md‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎docker-compose.yml‎
Lines changed: 13 additions & 12 deletions b/‎docker-compose.yml‎
Lines changed: 13 additions & 12 deletions
diff --git a/‎modules/helpers.py‎
Lines changed: 67 additions & 8 deletions b/‎modules/helpers.py‎
Lines changed: 67 additions & 8 deletions
diff --git a/‎modules/summary.py‎
Lines changed: 25 additions & 15 deletions b/‎modules/summary.py‎
Lines changed: 25 additions & 15 deletions
diff --git a/‎modules/ui.py‎
Lines changed: 42 additions & 4 deletions b/‎modules/ui.py‎
Lines changed: 42 additions & 4 deletions
diff --git a/‎modules/youtube.py‎
Lines changed: 2 additions & 2 deletions b/‎modules/youtube.py‎
Lines changed: 2 additions & 2 deletions
@@ -6,7 +6,9 @@
 
 ## Features :sparkles:
 
-YouTubeGPT lets you **summarize and chat (Q&A)** with YouTube videos. Its features include:
+YouTubeGPT is a web app that can be run fully locally and lets you **summarize and chat (Q&A)** with YouTube videos. You can either use OpenAI's API or a (local) Ollama instance.
+
+YouTubeGPT's features include:
 
 ### :writing_hand: Provide a custom prompt for summaries  [**VIEW DEMO**](https://youtu.be/rJqx3qvebws)
 
@@ -21,8 +23,9 @@ YouTubeGPT lets you **summarize and chat (Q&A)** with YouTube videos. Its featur
 - the summaries and answers can be saved to a library accessible at a separate page!
 - additionally, summaries and answers can be exported/downloaded as Markdown files!
 
-### :robot: Choose from different OpenAI models
+### :robot: Choose provider and models
 
+- choose between OpenAI's API or a (local) Ollama instance
 - currently available: ChatGPT 4-5 (incl. nano & mini) and *continuously updated* with new models
 - by choosing a different model, you can summarize even longer videos and get better responses
 
@@ -36,7 +39,11 @@ YouTubeGPT lets you **summarize and chat (Q&A)** with YouTube videos. Its featur
 
 ## Installation & usage
 
-No matter how you choose to run the app, you will first need to get an OpenAI API-Key. This is very straightforward and free. Have a look at [their instructions](https://platform.openai.com/docs/quickstart/account-setup) to get started.  
+If you want to use OpenAI's API, you will first need to get an OpenAI API-Key. This is very straightforward and free. Have a look at [their instructions](https://platform.openai.com/docs/quickstart/account-setup) to get started.
+
+If you want to use Ollama, you need to have an Ollama server running locally or remotely. You can download Ollama for macOS, Linux, or Windows [on their website](https://ollama.com/download). Make sure the server is reachable either on the default port `11434` or set the `OLLAMA_HOST` environment variable to point to your Ollama server. Also, you need to **pull the models** you want to use.
+
+> **Note**: Ollama limits the context window to 4k tokens by default. I strongly recommend to adjust it to at least 16k tokens. This can be done in the Ollama app settings.
 
 ### Run with Docker
 
 
@@ -28,24 +28,25 @@ services:
       # replace with your OpenAI API key or the name of the environment
       # variable that stores it on your PC 
       - OPENAI_API_KEY=${OPENAI_YOUTUBEGPT_API_KEY}
+      - OLLAMA_HOST=http://host.docker.internal:11434
     ports:
       - "8501:8501"
     networks:
       - net
 
-  ollama:
-    image: ollama/ollama:0.13.5
-    container_name: ollama
-    volumes:
-      - ollama:/root/.ollama
-    ports:
-      - "11434:11434"
-    restart: unless-stopped
-    networks:
-      - net
+  #ollama:
+  #  image: ollama/ollama:0.13.3
+  #  container_name: ollama
+  #  volumes:
+  #    - ollama:/root/.ollama
+  #  ports:
+  #    - "11434:11434"
+  #  restart: unless-stopped
+  #  networks:
+  #    - net
 
 volumes:
   chroma:
     driver: local
-  ollama:
-    driver: local
+#  ollama:
+#    driver: local
@@ -3,8 +3,9 @@
 import os
 import re
 from pathlib import Path
-from typing import List, Literal
+from typing import List, Literal, Optional
 
+import ollama
 import openai
 import streamlit as st
 import tiktoken
@@ -68,13 +69,17 @@ def get_available_models(
         get_default_config_value(f"available_models.{model_type}")
     )
 
+    def _filter_available(models: List[str]) -> List[str]:
+        return [m for m in selectable_model_ids if m in models]
+
+    if not api_key and not os.getenv("OPENAI_API_KEY"):
+        return selectable_model_ids
+
     # AVAILABLE_MODEL_IDS env var stores all the model IDs available to the user as a list (separated by a comma)
     # the env var is set programatically below
     available_model_ids = os.getenv("AVAILABLE_MODEL_IDS")
     if available_model_ids:
-        return filter(
-            lambda m: m in available_model_ids.split(","), selectable_model_ids
-        )
+        return _filter_available(available_model_ids.split(","))
 
     try:
         available_model_ids: list = [model.id for model in openai.models.list()]
@@ -94,7 +99,7 @@ def get_available_models(
         # set the AVAILABLE_MODEL_IDS env var, so that the list of available models
         # doesn't have to be fetched every time
         os.environ["AVAILABLE_MODEL_IDS"] = ",".join(available_model_ids)
-        return filter(lambda m: m in available_model_ids, selectable_model_ids)
+        return _filter_available(available_model_ids)
 
 
 def get_default_config_value(
@@ -203,14 +208,16 @@ def save_response_as_file(
     logging.info("File saved at: %s", file_path)
 
 
-def get_preffered_languages():
+def get_preferred_languages():
+    """Return preferred languages for transcripts."""
     # TODO: return from configuration object or config.json
     return ["en-US", "en", "de"]
 
 
-def num_tokens_from_string(string: str, model: str = "gpt-4o-mini") -> int:
+# TODO: handle Ollama models as well or fallback to other token count method
+def num_tokens_from_string(string: str, model: str = "gpt-4.1-nano") -> int:
     """
-    Returns the number of tokens in a text string.
+    Returns the number of tokens in a text string for OpenAI models.
 
     Args:
         string (str): The string to count tokens in.
@@ -238,3 +245,55 @@ def is_environment_prod():
     if os.getenv("ENVIRONMENT") == "production":
         return True
     return False
+
+
+def get_ollama_host() -> str:
+    """Return the configured Ollama host."""
+    return os.getenv("OLLAMA_HOST", "http://localhost:11434")
+
+
+def is_ollama_available(host: Optional[str] = None) -> bool:
+    """Checks whether an Ollama server is reachable."""
+    ollama_host = host or get_ollama_host()
+    try:
+        ollama.Client(host=ollama_host).list()
+    except Exception as e:
+        logging.error("Ollama connection check failed: %s", str(e))
+        return False
+    return True
+
+
+def _is_embedding_model(model: dict) -> bool:
+    """Determine whether an Ollama model is an embedding model."""
+    details = model.get("details", {})
+    family = details.get("family", "").lower()
+    model_type = details.get("model_type", "").lower()
+    name = model.get("model", "").lower()
+    return "embed" in family or "embedding" in model_type or "embed" in name
+
+
+def get_ollama_models(
+    model_type: Literal["gpts", "embeddings"], host: Optional[str] = None
+) -> List[str]:
+    """Returns available Ollama models filtered by type."""
+    ollama_host = host or get_ollama_host()
+    try:
+        models = ollama.Client(host=ollama_host).list().get("models", [])
+    except Exception as e:
+        logging.error("Could not list Ollama models: %s", str(e))
+        return []
+
+    if model_type == "embeddings":
+        return [model["model"] for model in models if _is_embedding_model(model)]
+    return [model["model"] for model in models if not _is_embedding_model(model)]
+
+
+def pull_ollama_model(model_name: str, host: Optional[str] = None) -> bool:
+    """Triggers pulling an Ollama model; returns True on success."""
+    ollama_host = host or get_ollama_host()
+    try:
+        ollama.Client(host=ollama_host).pull(model=model_name, stream=False)
+    except Exception as e:
+        logging.error("Failed to pull Ollama model %s: %s", model_name, str(e))
+        return False
+    return True
@@ -1,7 +1,8 @@
 import logging
 
+import ollama
 from langchain.messages import HumanMessage, SystemMessage
-from langchain_openai import ChatOpenAI
+from langchain_core.language_models import BaseChatModel
 
 from .helpers import num_tokens_from_string, read_file
 
@@ -10,7 +11,7 @@
 USER_PROMPT_TEMPLATE = read_file("prompts/summary_user_prompt.txt")
 
 # info about OpenAI's GPTs context windows: https://platform.openai.com/docs/models
-CONTEXT_WINDOWS = {
+OPENAI_CONTEXT_WINDOWS = {
     "gpt-3.5-turbo": {"total": 16385, "output": 4096},
     "gpt-4": {"total": 8192, "output": 4096},
     "gpt-4-turbo": {"total": 128000, "output": 4096},
@@ -40,13 +41,13 @@ def log_error(self):
         logging.error("Transcript too long for %s.", self.model_name, exc_info=True)
 
 
-def get_transcript_summary(transcript_text: str, llm: ChatOpenAI, **kwargs):
+def get_transcript_summary(transcript_text: str, llm: BaseChatModel, **kwargs):
     """
     Generates a summary from a video transcript using a language model.
 
     Args:
         transcript_text (str): The full transcript text of the video.
-        llm (ChatOpenAI): The language model instance to use for generating the summary.
+        llm (BaseChatModel): The language model instance to use for generating the summary.
         **kwargs: Optional keyword arguments.
             - custom_prompt (str): A custom prompt to replace the default summary request.
 
@@ -68,30 +69,39 @@ def get_transcript_summary(transcript_text: str, llm: ChatOpenAI, **kwargs):
         ---
         {transcript_text}
         ---
-"""
-
+        """
     else:
         user_prompt = USER_PROMPT_TEMPLATE.format(transcript_text=transcript_text)
 
+    if llm.name not in OPENAI_CONTEXT_WINDOWS.keys():
+        model_details = ollama.show(model=llm.name)
+        model_info = model_details.get("modelinfo", {})
+        general_arch = model_info.get("general.architecture", "")
+        max_context_length = model_info.get(f"{general_arch}.context_length", 4096)
+    else:
+        max_context_length = OPENAI_CONTEXT_WINDOWS[llm.name]["total"]
+
     # if the number of tokens in the transcript (plus the number of tokens in the prompt) exceed the model's context window, an exception is raised
-    if (
-        num_tokens_from_string(string=user_prompt, model=llm.model_name)
-        + num_tokens_from_string(string=SYSTEM_PROMPT, model=llm.model_name)
-        > CONTEXT_WINDOWS[llm.model_name]["total"]
-    ):
+    total_tokens = num_tokens_from_string(
+        string=user_prompt, model=llm.name
+    ) + num_tokens_from_string(string=SYSTEM_PROMPT, model=llm.name)
+    if total_tokens > max_context_length:
         raise TranscriptTooLongForModelException(
-            message=f"Your transcript exceeds the context window of the chosen model ({llm.model_name}), which is {CONTEXT_WINDOWS[llm.model_name]['total']} tokens. "
+            message=f"Your transcript exceeds the context window of the chosen model ({llm.name}), which is {max_context_length} tokens. "
             "Consider the following options:\n"
-            "1. Choose another model with larger context window (such as gpt-4o).\n"
+            "1. Choose another model with larger context window.\n"
             "2. Use the 'Chat' feature to ask specific questions about the video. There you won't be limited by the number of tokens.\n\n"
-            "You can get more information on context windows for different models in the [official OpenAI documentation about models](https://platform.openai.com/docs/models).",
-            model_name=llm.model_name,
+            "You can get more information on context windows for different OpenAI models in the [official documentation](https://platform.openai.com/docs/models).",
+            model_name=llm.name,
         )
 
     messages = [
         SystemMessage(content=SYSTEM_PROMPT),
         HumanMessage(content=user_prompt),
     ]
 
+    logging.info(
+        "Generating summary using model: %s. Total tokens: %d", llm.name, total_tokens
+    )
     response = llm.invoke(messages)
     return response.content
@@ -5,15 +5,19 @@
 from modules.helpers import (
     get_available_models,
     get_default_config_value,
+    get_ollama_models,
     is_api_key_set,
     is_api_key_valid,
+    is_ollama_available,
 )
 
 GENERAL_ERROR_MESSAGE = "An unexpected error occurred. If you are a developer and run the app locally, you can view the logs to see details about the error."
 
 
 def display_api_key_warning():
     """Checks whether an API key is provided and displays warning if not."""
+    if st.session_state.get("llm_provider", "OpenAI") == "Ollama":
+        return
     if not is_api_key_set():
         st.warning(
             """:warning: It seems you haven't provided an API key yet. Make sure to do so by providing it in the settings (sidebar) 
@@ -58,18 +62,50 @@ def display_model_settings_sidebar():
     For example here, the selectbox for model has the key 'model'.
     Thus the selected model can be accessed via st.session_state.model.
     """
+    if "llm_provider" not in st.session_state:
+        st.session_state.llm_provider = "OpenAI"
     if "model" not in st.session_state:
         st.session_state.model = get_default_config_value("default_model.gpt")
 
     with st.sidebar:
         st.header("Model settings")
+        provider = st.selectbox(
+            label="Select provider",
+            options=["OpenAI", "Ollama"],
+            key="llm_provider",
+        )
+        available_models = []
+        if provider == "Ollama":
+            ollama_ready = is_ollama_available()
+            if not ollama_ready:
+                st.warning(
+                    "Ollama server not reachable. Ensure it is running locally on port 11434 or set the host via the `OLLAMA_HOST` environment variable."
+                )
+            available_models = (
+                get_ollama_models(model_type="gpts") if ollama_ready else []
+            )
+            if not available_models:
+                st.warning(
+                    "No Ollama models available. Pull a model in your terminal before proceeding."
+                )
+        else:
+            if is_api_key_set():
+                available_models = get_available_models(
+                    model_type="gpts", api_key=st.session_state.openai_api_key
+                )
+            else:
+                available_models = []
+        if available_models and st.session_state.model not in available_models:
+            st.session_state.model = available_models[0]
+        model_options = (
+            available_models if available_models else [st.session_state.model]
+        )
         model = st.selectbox(
             label="Select a large language model",
-            options=get_available_models(
-                model_type="gpts", api_key=st.session_state.openai_api_key
-            ),
+            options=model_options,
             key="model",
             help=get_default_config_value("help_texts.model"),
+            disabled=not available_models,
         )
         st.slider(
             label="Adjust temperature",
@@ -93,7 +129,9 @@ def display_model_settings_sidebar():
             st.warning(
                 "OpenAI generally recommends altering temperature or top_p but not both. See their [API reference](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature)"
             )
-        if model != get_default_config_value("default_model.gpt"):
+        if provider == "OpenAI" and model != get_default_config_value(
+            "default_model.gpt"
+        ):
             st.warning(
                 """:warning: Be aware of the higher costs and latencies when using more advanced (reasoning) models (like gpt-5). You can see details (incl. costs) about the models and compare them [here](https://platform.openai.com/docs/models/compare)."""
             )
 
@@ -6,7 +6,7 @@
 from youtube_transcript_api import CouldNotRetrieveTranscript, YouTubeTranscriptApi
 from youtube_transcript_api.formatters import TextFormatter
 
-from .helpers import extract_youtube_video_id, get_preffered_languages
+from .helpers import extract_youtube_video_id, get_preferred_languages
 
 OEMBED_PROVIDER = "https://noembed.com/embed"
 
@@ -66,7 +66,7 @@ def fetch_youtube_transcript(url: str):
 
     try:
         transcript = YouTubeTranscriptApi().fetch(
-            video_id, languages=get_preffered_languages()
+            video_id, languages=get_preferred_languages()
         )
     except CouldNotRetrieveTranscript as e:
         logging.error("Failed to retrieve transcript for URL: %s", str(e))