Skip to content

Commit 452ac03

Browse files
committed
rework the pdf parsing logic
1 parent 30d3fcc commit 452ac03

File tree

8 files changed

+394
-402
lines changed

8 files changed

+394
-402
lines changed

compose.yaml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,19 @@ services:
5353
# sleep 5 && \
5454
# ollama pull qwen2.5vl:7b && \
5555
# wait"]
56+
# healthcheck:
57+
# test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:11434/api/tags']
58+
# interval: 10s
59+
# timeout: 5s
60+
# retries: 5
5661

5762
dalai:
5863
build:
5964
context: ./dalai
6065
dockerfile: dev.Dockerfile
6166
environment:
62-
- OLLAMA_URL=http://host.docker.internal:11434
63-
# - OLLAMA_URL=http://ollama:11434
67+
# - OLLAMA_URL=http://host.docker.internal:11434
68+
- OLLAMA_URL=http://ollama:11434
6469
- REDIS_HOST=redis
6570
- REDIS_PORT=6379
6671
- S3_HOST=http://minio:9000

dalai/worker.ts

Lines changed: 49 additions & 358 deletions
Large diffs are not rendered by default.

package-lock.json

Lines changed: 204 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
"notistack": "^3.0.1",
110110
"openai": "4.102.0",
111111
"pdf-parse-fork": "^1.2.0",
112+
"pdfjs-dist": "^5.4.149",
112113
"pg": "^8.11.3",
113114
"react": "19.1.1",
114115
"react-dom": "19.1.1",

src/server/services/jobs/ingestion.job.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ type IngestionJobData = {
3939
export const submitIngestionJob = async (ragFile: RagFile) => {
4040
const s3Key = FileStore.getRagFileKey(ragFile)
4141
const jobId = getIngestionJobId(ragFile)
42-
console.log(`Submitting Ingestion job ${jobId}`)
4342
const jobData: IngestionJobData = {
4443
type: 'ingestion',
4544
s3Bucket: S3_BUCKET,

0 commit comments

Comments
 (0)