Skip to content

Commit 22276b1

Browse files
committed
replace Ollama with VLLM
1 parent fea727e commit 22276b1

29 files changed

+1818
-1841
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ See the [Features](#features) section to learn more about the various functional
4646
- 🔓 Free open source software
4747

4848
## Quick start
49+
4950
The best way to get started is to watch our [Tutorial Video Series](https://www.youtube.com/watch?v=_ft3RvKhyXM&list=PLVRi8E_6wxgQV4MOAMIkRhhnjCC_JqvTb), read the [User Guide](https://github.com/uhh-lt/dats/wiki/User-Guide), and play with DATS on our [Demo Instance](https://dats.ltdemos.informatik.uni-hamburg.de/).
5051

5152
<details>
@@ -75,7 +76,7 @@ git clone https://github.com/uhh-lt/dats.git
7576
#### 3. Start docker containers
7677

7778
```bash
78-
docker compose -f compose.ollama.yml up -d
79+
docker compose -f compose.vllm.yml up -d
7980
```
8081

8182
```bash

backend/.env.example

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,12 @@ WEAVIATE_GRPC_PORT=13134
7474
RAY_HOST=localhost
7575
RAY_PORT=13130
7676

77-
OLLAMA_HOST=localhost
78-
OLLAMA_PORT=13133
79-
OLLAMA_LLM_MODEL=gemma3:27b
80-
OLLAMA_VLM_MODEL=gemma3:27b
77+
VLLM_LLM_MODEL=gemma-3-27b
78+
VLLM_LLM_HOST=localhost
79+
VLLM_LLM_PORT=13133
80+
VLLM_VLM_HOST=localhost
81+
VLLM_VLM_PORT=13133
82+
VLLM_VLM_MODEL=gemma-3-27b
83+
VLLM_EMB_HOST=localhost
84+
VLLM_EMB_PORT=13137
85+
VLLM_EMB_MODEL=snowflake-arctic-embed-l-v2.0

backend/configs/development.yaml

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -97,26 +97,19 @@ elasticsearch:
9797
sniff_on_connection_fail: False
9898
sniffer_timeout: 120
9999

100-
ollama:
101-
host: ${oc.env:OLLAMA_HOST, localhost}
102-
port: ${oc.env:OLLAMA_PORT, 13133}
100+
vllm:
103101
llm:
104-
model: ${oc.env:OLLAMA_LLM_MODEL, gemma3:27b}
105-
default_params:
106-
num_ctx: 16384
102+
host: ${oc.env:VLLM_LLM_HOST, vllm-gemma}
103+
port: ${oc.env:VLLM_LLM_PORT, 8000}
104+
model: ${oc.env:VLLM_LLM_MODEL, gemma-3-27b}
107105
vlm:
108-
model: ${oc.env:OLLAMA_VLM_MODEL, gemma3:27b}
109-
default_params:
110-
num_ctx: 16384
111-
# temperature: 0.0
112-
# seed: 1337
113-
# num_predict: 8192
114-
# top_p: 0.9
115-
# top_k: 40
116-
# repetition_penalty: 1.1
106+
host: ${oc.env:VLLM_VLM_HOST, vllm-gemma}
107+
port: ${oc.env:VLLM_VLM_PORT, 8000}
108+
model: ${oc.env:VLLM_VLM_MODEL, gemma-3-27b}
117109
emb:
118-
model: ${oc.env:OLLAMA_EMB_MODEL, snowflake-arctic-embed2:568m}
119-
default_params:
110+
host: ${oc.env:VLLM_EMB_HOST, vllm-emb}
111+
port: ${oc.env:VLLM_EMB_PORT, 8000}
112+
model: ${oc.env:VLLM_EMB_MODEL, snowflake-arctic-embed2:568m}
120113

121114
llm_assistant:
122115
sentence_annotation:

backend/configs/production.yaml

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -97,26 +97,19 @@ elasticsearch:
9797
sniff_on_connection_fail: False
9898
sniffer_timeout: 120
9999

100-
ollama:
101-
host: ${oc.env:OLLAMA_HOST, ollama}
102-
port: ${oc.env:OLLAMA_PORT, 11434}
100+
vllm:
103101
llm:
104-
model: ${oc.env:OLLAMA_LLM_MODEL, gemma3:27b}
105-
default_params:
106-
num_ctx: 16384
102+
host: ${oc.env:VLLM_LLM_HOST, vllm-gemma}
103+
port: ${oc.env:VLLM_LLM_PORT, 8000}
104+
model: ${oc.env:VLLM_LLM_MODEL, gemma-3-27b}
107105
vlm:
108-
model: ${oc.env:OLLAMA_VLM_MODEL, gemma3:27b}
109-
default_params:
110-
num_ctx: 16384
111-
# temperature: 0.0
112-
# seed: 1337
113-
# num_predict: 8192
114-
# top_p: 0.9
115-
# top_k: 40
116-
# repetition_penalty: 1.1
106+
host: ${oc.env:VLLM_VLM_HOST, vllm-gemma}
107+
port: ${oc.env:VLLM_VLM_PORT, 8000}
108+
model: ${oc.env:VLLM_VLM_MODEL, gemma-3-27b}
117109
emb:
118-
model: ${oc.env:OLLAMA_EMB_MODEL, snowflake-arctic-embed2:568m}
119-
default_params:
110+
host: ${oc.env:VLLM_EMB_HOST, vllm-emb}
111+
port: ${oc.env:VLLM_EMB_PORT, 8000}
112+
model: ${oc.env:VLLM_EMB_MODEL, snowflake-arctic-embed2:568m}
120113

121114
llm_assistant:
122115
sentence_annotation:

backend/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ dependencies = [
4040
"matplotlib==3.7.1",
4141
"multiprocess==0.70.15",
4242
"networkx==3.2.1",
43-
"ollama==0.4.7",
4443
"omegaconf==2.3.0",
44+
"openai>=1.101.0",
4545
"pandas==2.1.4",
4646
"passlib==1.7",
4747
"pillow==10.0.0",

backend/src/core/memo/memo_endpoint.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
MemoRead,
1717
MemoUpdate,
1818
)
19-
from core.memo.memo_generation_service import generate_memo_ollama
19+
from core.memo.memo_generation_service import generate_memo_llm
2020
from core.memo.memo_utils import get_object_memo_for_user, get_object_memos
2121

2222
router = APIRouter(
@@ -214,4 +214,4 @@ def generate_memo_suggestion(
214214

215215
authz_user.assert_in_project(project_id=proj_id)
216216

217-
return generate_memo_ollama(attached_object, db)
217+
return generate_memo_llm(attached_object, db)

backend/src/core/memo/memo_generation_service.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717
IMG_CAPTION_USER_PROMPT,
1818
)
1919
from repos.filesystem_repo import FilesystemRepo
20-
from repos.ollama_repo import OllamaRepo
20+
from repos.llm_repo import LLMRepo
2121
from utils.image_utils import image_to_base64, load_image
2222

2323

24-
class OllamaMemoResult(BaseModel):
24+
class LLMMemoResult(BaseModel):
2525
memo: str
2626

2727

@@ -135,7 +135,7 @@ def summarize_span_group(
135135
MEMO_GEN_PROMPT = "Don't use imperative form. Generate a concise, 1-2 sentence helpful memo about the following object:\n\n{obj_summary}"
136136

137137

138-
def generate_memo_ollama(
138+
def generate_memo_llm(
139139
obj: (
140140
SourceDocumentORM
141141
| TagORM
@@ -149,7 +149,7 @@ def generate_memo_ollama(
149149
db: Session,
150150
) -> str:
151151
# 1. Update job description
152-
msg = "Started Memo Generation (OLLAMA)"
152+
msg = "Started Memo Generation (LLM)"
153153
logger.info(msg)
154154
# 2. Build the prompt for the memo suggestion
155155
summary_fn = SUMMARY_FUNCTIONS.get(type(obj))
@@ -158,17 +158,17 @@ def generate_memo_ollama(
158158
else:
159159
raise NotImplementedError(f"AttachedObjectType is not supported: {type(obj)}")
160160

161-
# 3. Send to Ollama for processing
161+
# 3. Send to LLM for processing
162162
if isImage:
163-
caption, _ = OllamaRepo().vlm_chat(
163+
caption, _ = LLMRepo().vlm_chat(
164164
user_prompt=IMG_CAPTION_USER_PROMPT, b64_images=[obj_summary]
165165
)
166166
return caption.strip()
167167
else:
168-
response = OllamaRepo().llm_chat(
168+
response = LLMRepo().llm_chat(
169169
system_prompt="You are a helpful assistant generating memos.",
170170
user_prompt=MEMO_GEN_PROMPT.format(obj_summary=obj_summary),
171-
response_model=OllamaMemoResult,
171+
response_model=LLMMemoResult,
172172
)
173173
logger.info(f"Got chat response for object ID {obj.id}! Response={response}")
174174

backend/src/main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from modules.crawler.crawler_exceptions import NoDataToCrawlError
2121
from repos.elastic.elastic_crud_base import NoSuchObjectInElasticSearchError
2222
from repos.elastic.elastic_repo import ElasticSearchRepo
23-
from repos.ollama_repo import OllamaRepo
23+
from repos.llm_repo import LLMRepo
2424
from utils.import_utils import import_by_suffix
2525

2626
#####################################################################################################################
@@ -76,7 +76,7 @@ async def lifespan(app: FastAPI):
7676
logger.info("Stopping Discourse Analysis Tool Suite FastAPI!")
7777
FilesystemRepo().purge_temporary_files()
7878
# Close repo connections
79-
OllamaRepo().close_connection()
79+
LLMRepo().close_connection()
8080
ElasticSearchRepo().close_connection()
8181

8282

backend/src/modules/doc_processing/image/image_caption_job.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@
1111
IMG_CAPTION_USER_PROMPT,
1212
)
1313
from repos.db.sql_repo import SQLRepo
14-
from repos.ollama_repo import OllamaRepo
14+
from repos.llm_repo import LLMRepo
1515
from systems.job_system.job_dto import Job, JobOutputBase
1616
from systems.job_system.job_register_decorator import register_job
1717
from utils.image_utils import image_to_base64, load_image
1818

19-
ollama = OllamaRepo()
19+
llm = LLMRepo()
2020
sqlr = SQLRepo()
2121

2222

@@ -40,7 +40,7 @@ def handle_image_caption_job(
4040
) -> ImageCaptionJobOutput:
4141
image = load_image(payload.filepath)
4242
image_b64 = image_to_base64(image)
43-
caption, _ = ollama.vlm_chat(
43+
caption, _ = llm.vlm_chat(
4444
user_prompt=IMG_CAPTION_USER_PROMPT, b64_images=[image_b64]
4545
)
4646

backend/src/modules/llm_assistant/llm_service.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -55,24 +55,24 @@
5555
)
5656
from modules.llm_assistant.prompts.annotation_prompt_builder import (
5757
AnnotationPromptBuilder,
58-
OllamaAnnotationResults,
58+
LLMAnnotationResults,
5959
)
6060
from modules.llm_assistant.prompts.metadata_prompt_builder import (
61+
LLMMetadataExtractionResults,
6162
MetadataPromptBuilder,
62-
OllamaMetadataExtractionResults,
6363
)
6464
from modules.llm_assistant.prompts.prompt_builder import PromptBuilder
6565
from modules.llm_assistant.prompts.sentence_annotation_prompt_builder import (
66-
OllamaSentenceAnnotationResults,
66+
LLMSentenceAnnotationResults,
6767
SentenceAnnotationPromptBuilder,
6868
)
6969
from modules.llm_assistant.prompts.tagging_prompt_builder import (
70-
OllamaTaggingResult,
70+
LLMTaggingResult,
7171
TaggingPromptBuilder,
7272
)
7373
from ray_model_worker.dto.seqsenttagger import SeqSentTaggerDoc, SeqSentTaggerJobInput
7474
from repos.db.sql_repo import SQLRepo
75-
from repos.ollama_repo import OllamaRepo
75+
from repos.llm_repo import LLMRepo
7676
from repos.ray_repo import RayRepo
7777
from repos.vector.weaviate_repo import WeaviateRepo
7878
from systems.job_system.job_dto import Job
@@ -83,7 +83,7 @@
8383
class LLMAssistantService(metaclass=SingletonMeta):
8484
def __new__(cls, *args, **kwargs):
8585
cls.sqlr: SQLRepo = SQLRepo()
86-
cls.ollama: OllamaRepo = OllamaRepo()
86+
cls.llm: LLMRepo = LLMRepo()
8787
cls.ray: RayRepo = RayRepo()
8888
cls.weaviate: WeaviateRepo = WeaviateRepo()
8989

@@ -439,10 +439,10 @@ def _llm_tagging(
439439
)
440440

441441
# prompt the model
442-
response = self.ollama.llm_chat(
442+
response = self.llm.llm_chat(
443443
system_prompt=system_prompt,
444444
user_prompt=user_prompt,
445-
response_model=OllamaTaggingResult,
445+
response_model=LLMTaggingResult,
446446
)
447447
logger.info(
448448
f"Got chat response! Tags={response.categories}, Reason={response.reasoning}"
@@ -570,10 +570,10 @@ def _llm_metadata_extraction(
570570
)
571571

572572
# prompt the model
573-
response = self.ollama.llm_chat(
573+
response = self.llm.llm_chat(
574574
system_prompt=system_prompt,
575575
user_prompt=user_prompt,
576-
response_model=OllamaMetadataExtractionResults,
576+
response_model=LLMMetadataExtractionResults,
577577
)
578578
logger.info(f"Got chat response! Response={response.data}")
579579

@@ -705,10 +705,10 @@ def _llm_annotation(
705705
)
706706

707707
# prompt the model
708-
response = self.ollama.llm_chat(
708+
response = self.llm.llm_chat(
709709
system_prompt=system_prompt,
710710
user_prompt=user_prompt,
711-
response_model=OllamaAnnotationResults,
711+
response_model=LLMAnnotationResults,
712712
)
713713
logger.info(f"Got chat response! Response={response}")
714714

@@ -814,7 +814,7 @@ def _llm_sentence_annotation(
814814
), "Wrong approach parameters!"
815815
is_fewshot = isinstance(approach_parameters, FewShotParams)
816816

817-
msg = f"Started LLMJob - Sentence Annotation (OLLAMA), num docs: {len(task_parameters.sdoc_ids)}"
817+
msg = f"Started LLMJob - Sentence Annotation (LLM), num docs: {len(task_parameters.sdoc_ids)}"
818818
self._update_llm_job_description(
819819
job=job,
820820
description=msg,
@@ -901,10 +901,10 @@ def _llm_sentence_annotation(
901901
)
902902

903903
# prompt the model
904-
response = self.ollama.llm_chat(
904+
response = self.llm.llm_chat(
905905
system_prompt=system_prompt,
906906
user_prompt=user_prompt,
907-
response_model=OllamaSentenceAnnotationResults,
907+
response_model=LLMSentenceAnnotationResults,
908908
)
909909
logger.info(f"Got chat response! Response={response}")
910910

0 commit comments

Comments
 (0)