uhh-lt
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎backend/.env.example‎
Lines changed: 9 additions & 4 deletions b/‎backend/.env.example‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎backend/configs/development.yaml‎
Lines changed: 10 additions & 17 deletions b/‎backend/configs/development.yaml‎
Lines changed: 10 additions & 17 deletions
diff --git a/‎backend/configs/production.yaml‎
Lines changed: 10 additions & 17 deletions b/‎backend/configs/production.yaml‎
Lines changed: 10 additions & 17 deletions
diff --git a/‎backend/pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎backend/pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/src/core/memo/memo_endpoint.py‎
Lines changed: 2 additions & 2 deletions b/‎backend/src/core/memo/memo_endpoint.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backend/src/core/memo/memo_generation_service.py‎
Lines changed: 8 additions & 8 deletions b/‎backend/src/core/memo/memo_generation_service.py‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎backend/src/main.py‎
Lines changed: 2 additions & 2 deletions b/‎backend/src/main.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backend/src/modules/doc_processing/image/image_caption_job.py‎
Lines changed: 3 additions & 3 deletions b/‎backend/src/modules/doc_processing/image/image_caption_job.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backend/src/modules/llm_assistant/llm_service.py‎
Lines changed: 15 additions & 15 deletions b/‎backend/src/modules/llm_assistant/llm_service.py‎
Lines changed: 15 additions & 15 deletions
@@ -46,6 +46,7 @@ See the [Features](#features) section to learn more about the various functional
 - 🔓 Free open source software
 
 ## Quick start
+
 The best way to get started is to watch our [Tutorial Video Series](https://www.youtube.com/watch?v=_ft3RvKhyXM&list=PLVRi8E_6wxgQV4MOAMIkRhhnjCC_JqvTb), read the [User Guide](https://github.com/uhh-lt/dats/wiki/User-Guide), and play with DATS on our [Demo Instance](https://dats.ltdemos.informatik.uni-hamburg.de/).
 
 <details>
@@ -75,7 +76,7 @@ git clone https://github.com/uhh-lt/dats.git
 #### 3. Start docker containers
 
 ```bash
-docker compose -f compose.ollama.yml up -d
+docker compose -f compose.vllm.yml up -d
 ```
 
 ```bash
 
@@ -74,7 +74,12 @@ WEAVIATE_GRPC_PORT=13134
 RAY_HOST=localhost
 RAY_PORT=13130
 
-OLLAMA_HOST=localhost
-OLLAMA_PORT=13133
-OLLAMA_LLM_MODEL=gemma3:27b
-OLLAMA_VLM_MODEL=gemma3:27b
+VLLM_LLM_MODEL=gemma-3-27b
+VLLM_LLM_HOST=localhost
+VLLM_LLM_PORT=13133
+VLLM_VLM_HOST=localhost
+VLLM_VLM_PORT=13133
+VLLM_VLM_MODEL=gemma-3-27b
+VLLM_EMB_HOST=localhost
+VLLM_EMB_PORT=13137
+VLLM_EMB_MODEL=snowflake-arctic-embed-l-v2.0
@@ -97,26 +97,19 @@ elasticsearch:
   sniff_on_connection_fail: False
   sniffer_timeout: 120
 
-ollama:
-  host: ${oc.env:OLLAMA_HOST, localhost}
-  port: ${oc.env:OLLAMA_PORT, 13133}
+vllm:
   llm:
-    model: ${oc.env:OLLAMA_LLM_MODEL, gemma3:27b}
-    default_params:
-      num_ctx: 16384
+    host: ${oc.env:VLLM_LLM_HOST, vllm-gemma}
+    port: ${oc.env:VLLM_LLM_PORT, 8000}
+    model: ${oc.env:VLLM_LLM_MODEL, gemma-3-27b}
   vlm:
-    model: ${oc.env:OLLAMA_VLM_MODEL, gemma3:27b}
-    default_params:
-      num_ctx: 16384
-      # temperature: 0.0
-      # seed: 1337
-      # num_predict: 8192
-      # top_p: 0.9
-      # top_k: 40
-      # repetition_penalty: 1.1
+    host: ${oc.env:VLLM_VLM_HOST, vllm-gemma}
+    port: ${oc.env:VLLM_VLM_PORT, 8000}
+    model: ${oc.env:VLLM_VLM_MODEL, gemma-3-27b}
   emb:
-    model: ${oc.env:OLLAMA_EMB_MODEL, snowflake-arctic-embed2:568m}
-    default_params:
+    host: ${oc.env:VLLM_EMB_HOST, vllm-emb}
+    port: ${oc.env:VLLM_EMB_PORT, 8000}
+    model: ${oc.env:VLLM_EMB_MODEL, snowflake-arctic-embed2:568m}
 
 llm_assistant:
   sentence_annotation:
 
@@ -97,26 +97,19 @@ elasticsearch:
   sniff_on_connection_fail: False
   sniffer_timeout: 120
 
-ollama:
-  host: ${oc.env:OLLAMA_HOST, ollama}
-  port: ${oc.env:OLLAMA_PORT, 11434}
+vllm:
   llm:
-    model: ${oc.env:OLLAMA_LLM_MODEL, gemma3:27b}
-    default_params:
-      num_ctx: 16384
+    host: ${oc.env:VLLM_LLM_HOST, vllm-gemma}
+    port: ${oc.env:VLLM_LLM_PORT, 8000}
+    model: ${oc.env:VLLM_LLM_MODEL, gemma-3-27b}
   vlm:
-    model: ${oc.env:OLLAMA_VLM_MODEL, gemma3:27b}
-    default_params:
-      num_ctx: 16384
-      # temperature: 0.0
-      # seed: 1337
-      # num_predict: 8192
-      # top_p: 0.9
-      # top_k: 40
-      # repetition_penalty: 1.1
+    host: ${oc.env:VLLM_VLM_HOST, vllm-gemma}
+    port: ${oc.env:VLLM_VLM_PORT, 8000}
+    model: ${oc.env:VLLM_VLM_MODEL, gemma-3-27b}
   emb:
-    model: ${oc.env:OLLAMA_EMB_MODEL, snowflake-arctic-embed2:568m}
-    default_params:
+    host: ${oc.env:VLLM_EMB_HOST, vllm-emb}
+    port: ${oc.env:VLLM_EMB_PORT, 8000}
+    model: ${oc.env:VLLM_EMB_MODEL, snowflake-arctic-embed2:568m}
 
 llm_assistant:
   sentence_annotation:
 
@@ -40,8 +40,8 @@ dependencies = [
     "matplotlib==3.7.1",
     "multiprocess==0.70.15",
     "networkx==3.2.1",
-    "ollama==0.4.7",
     "omegaconf==2.3.0",
+    "openai>=1.101.0",
     "pandas==2.1.4",
     "passlib==1.7",
     "pillow==10.0.0",
 
@@ -16,7 +16,7 @@
     MemoRead,
     MemoUpdate,
 )
-from core.memo.memo_generation_service import generate_memo_ollama
+from core.memo.memo_generation_service import generate_memo_llm
 from core.memo.memo_utils import get_object_memo_for_user, get_object_memos
 
 router = APIRouter(
@@ -214,4 +214,4 @@ def generate_memo_suggestion(
 
     authz_user.assert_in_project(project_id=proj_id)
 
-    return generate_memo_ollama(attached_object, db)
+    return generate_memo_llm(attached_object, db)
@@ -17,11 +17,11 @@
     IMG_CAPTION_USER_PROMPT,
 )
 from repos.filesystem_repo import FilesystemRepo
-from repos.ollama_repo import OllamaRepo
+from repos.llm_repo import LLMRepo
 from utils.image_utils import image_to_base64, load_image
 
 
-class OllamaMemoResult(BaseModel):
+class LLMMemoResult(BaseModel):
     memo: str
 
 
@@ -135,7 +135,7 @@ def summarize_span_group(
 MEMO_GEN_PROMPT = "Don't use imperative form. Generate a concise, 1-2 sentence helpful memo about the following object:\n\n{obj_summary}"
 
 
-def generate_memo_ollama(
+def generate_memo_llm(
     obj: (
         SourceDocumentORM
         | TagORM
@@ -149,7 +149,7 @@ def generate_memo_ollama(
     db: Session,
 ) -> str:
     # 1. Update job description
-    msg = "Started Memo Generation (OLLAMA)"
+    msg = "Started Memo Generation (LLM)"
     logger.info(msg)
     # 2. Build the prompt for the memo suggestion
     summary_fn = SUMMARY_FUNCTIONS.get(type(obj))
@@ -158,17 +158,17 @@ def generate_memo_ollama(
     else:
         raise NotImplementedError(f"AttachedObjectType is not supported: {type(obj)}")
 
-    # 3. Send to Ollama for processing
+    # 3. Send to LLM for processing
     if isImage:
-        caption, _ = OllamaRepo().vlm_chat(
+        caption, _ = LLMRepo().vlm_chat(
             user_prompt=IMG_CAPTION_USER_PROMPT, b64_images=[obj_summary]
         )
         return caption.strip()
     else:
-        response = OllamaRepo().llm_chat(
+        response = LLMRepo().llm_chat(
             system_prompt="You are a helpful assistant generating memos.",
             user_prompt=MEMO_GEN_PROMPT.format(obj_summary=obj_summary),
-            response_model=OllamaMemoResult,
+            response_model=LLMMemoResult,
         )
         logger.info(f"Got chat response for object ID {obj.id}! Response={response}")
 
 
@@ -20,7 +20,7 @@
 from modules.crawler.crawler_exceptions import NoDataToCrawlError
 from repos.elastic.elastic_crud_base import NoSuchObjectInElasticSearchError
 from repos.elastic.elastic_repo import ElasticSearchRepo
-from repos.ollama_repo import OllamaRepo
+from repos.llm_repo import LLMRepo
 from utils.import_utils import import_by_suffix
 
 #####################################################################################################################
@@ -76,7 +76,7 @@ async def lifespan(app: FastAPI):
     logger.info("Stopping Discourse Analysis Tool Suite FastAPI!")
     FilesystemRepo().purge_temporary_files()
     # Close repo connections
-    OllamaRepo().close_connection()
+    LLMRepo().close_connection()
     ElasticSearchRepo().close_connection()
 
 
 
@@ -11,12 +11,12 @@
     IMG_CAPTION_USER_PROMPT,
 )
 from repos.db.sql_repo import SQLRepo
-from repos.ollama_repo import OllamaRepo
+from repos.llm_repo import LLMRepo
 from systems.job_system.job_dto import Job, JobOutputBase
 from systems.job_system.job_register_decorator import register_job
 from utils.image_utils import image_to_base64, load_image
 
-ollama = OllamaRepo()
+llm = LLMRepo()
 sqlr = SQLRepo()
 
 
@@ -40,7 +40,7 @@ def handle_image_caption_job(
 ) -> ImageCaptionJobOutput:
     image = load_image(payload.filepath)
     image_b64 = image_to_base64(image)
-    caption, _ = ollama.vlm_chat(
+    caption, _ = llm.vlm_chat(
         user_prompt=IMG_CAPTION_USER_PROMPT, b64_images=[image_b64]
     )
 
 
@@ -55,24 +55,24 @@
 )
 from modules.llm_assistant.prompts.annotation_prompt_builder import (
     AnnotationPromptBuilder,
-    OllamaAnnotationResults,
+    LLMAnnotationResults,
 )
 from modules.llm_assistant.prompts.metadata_prompt_builder import (
+    LLMMetadataExtractionResults,
     MetadataPromptBuilder,
-    OllamaMetadataExtractionResults,
 )
 from modules.llm_assistant.prompts.prompt_builder import PromptBuilder
 from modules.llm_assistant.prompts.sentence_annotation_prompt_builder import (
-    OllamaSentenceAnnotationResults,
+    LLMSentenceAnnotationResults,
     SentenceAnnotationPromptBuilder,
 )
 from modules.llm_assistant.prompts.tagging_prompt_builder import (
-    OllamaTaggingResult,
+    LLMTaggingResult,
     TaggingPromptBuilder,
 )
 from ray_model_worker.dto.seqsenttagger import SeqSentTaggerDoc, SeqSentTaggerJobInput
 from repos.db.sql_repo import SQLRepo
-from repos.ollama_repo import OllamaRepo
+from repos.llm_repo import LLMRepo
 from repos.ray_repo import RayRepo
 from repos.vector.weaviate_repo import WeaviateRepo
 from systems.job_system.job_dto import Job
@@ -83,7 +83,7 @@
 class LLMAssistantService(metaclass=SingletonMeta):
     def __new__(cls, *args, **kwargs):
         cls.sqlr: SQLRepo = SQLRepo()
-        cls.ollama: OllamaRepo = OllamaRepo()
+        cls.llm: LLMRepo = LLMRepo()
         cls.ray: RayRepo = RayRepo()
         cls.weaviate: WeaviateRepo = WeaviateRepo()
 
@@ -439,10 +439,10 @@ def _llm_tagging(
                 )
 
                 # prompt the model
-                response = self.ollama.llm_chat(
+                response = self.llm.llm_chat(
                     system_prompt=system_prompt,
                     user_prompt=user_prompt,
-                    response_model=OllamaTaggingResult,
+                    response_model=LLMTaggingResult,
                 )
                 logger.info(
                     f"Got chat response! Tags={response.categories}, Reason={response.reasoning}"
@@ -570,10 +570,10 @@ def _llm_metadata_extraction(
                 )
 
                 # prompt the model
-                response = self.ollama.llm_chat(
+                response = self.llm.llm_chat(
                     system_prompt=system_prompt,
                     user_prompt=user_prompt,
-                    response_model=OllamaMetadataExtractionResults,
+                    response_model=LLMMetadataExtractionResults,
                 )
                 logger.info(f"Got chat response! Response={response.data}")
 
@@ -705,10 +705,10 @@ def _llm_annotation(
                 )
 
                 # prompt the model
-                response = self.ollama.llm_chat(
+                response = self.llm.llm_chat(
                     system_prompt=system_prompt,
                     user_prompt=user_prompt,
-                    response_model=OllamaAnnotationResults,
+                    response_model=LLMAnnotationResults,
                 )
                 logger.info(f"Got chat response! Response={response}")
 
@@ -814,7 +814,7 @@ def _llm_sentence_annotation(
         ), "Wrong approach parameters!"
         is_fewshot = isinstance(approach_parameters, FewShotParams)
 
-        msg = f"Started LLMJob - Sentence Annotation (OLLAMA), num docs: {len(task_parameters.sdoc_ids)}"
+        msg = f"Started LLMJob - Sentence Annotation (LLM), num docs: {len(task_parameters.sdoc_ids)}"
         self._update_llm_job_description(
             job=job,
             description=msg,
@@ -901,10 +901,10 @@ def _llm_sentence_annotation(
                 )
 
                 # prompt the model
-                response = self.ollama.llm_chat(
+                response = self.llm.llm_chat(
                     system_prompt=system_prompt,
                     user_prompt=user_prompt,
-                    response_model=OllamaSentenceAnnotationResults,
+                    response_model=LLMSentenceAnnotationResults,
                 )
                 logger.info(f"Got chat response! Response={response}")