Skip to content

Commit e162cc9

Browse files
committed
Fixed media-types
1 parent c929501 commit e162cc9

File tree

6 files changed

+39
-10
lines changed

6 files changed

+39
-10
lines changed

Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
4545
ffmpeg \
4646
libsm6 \
4747
libxext6 \
48+
media-types \
4849
htop \
4950
vim \
5051
wget \

Dockerfile-gpu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
FROM pytorch/pytorch:2.6.0-cuda12.6-cudnn9-devel
22
#FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime
33

4-
RUN apt-get update && apt install ffmpeg libsm6 libxext6 nvtop wget htop vim -y
4+
RUN apt-get update && apt install ffmpeg libsm6 libxext6 media-types nvtop wget htop vim -y
55

66
# Set working directory
77
WORKDIR /app

docker-compose.yaml

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
services:
22

33
fetchcraft-admin:
4-
image: fetchcraft
4+
image: fetchcraft:latest
55
container_name: fetchcraft-admin
66
entrypoint: ["uv", "run", "fetchcraft-admin"]
77
hostname: fetchcraft-admin
@@ -17,7 +17,7 @@ services:
1717
depends_on:
1818
- qdrant
1919
- mongodb
20-
- fetchcraft-docling-server
20+
- pgvector
2121
restart: unless-stopped
2222
networks: [ 'demo-net' ]
2323

@@ -65,7 +65,7 @@ services:
6565
networks: [ 'demo-net' ]
6666

6767
librechat:
68-
image: librechat-new
68+
image: librechat
6969
# image: librechat-dev-alex2
7070
# image: ghcr.io/danny-avila/librechat-dev-api:latest
7171
container_name: librechat
@@ -99,6 +99,7 @@ services:
9999
qdrant:
100100
image: qdrant/qdrant
101101
hostname: fetchcraft-qdrant
102+
container_name: fetchcraft-qdrant
102103
ports:
103104
- "6333:6333"
104105
- "6334:6334"
@@ -170,6 +171,29 @@ services:
170171
# env_file:
171172
# - librechat.env
172173

174+
compass:
175+
image: haohanyang/compass-web
176+
container_name: mongodb-compass
177+
environment:
178+
- CW_MONGO_URI=mongodb://mongodb:27017
179+
depends_on:
180+
- mongodb
181+
ports:
182+
- 9010:8080
183+
networks: [ 'demo-net' ]
184+
185+
pgadmin:
186+
image: dpage/pgadmin4
187+
container_name: pgadmin4_container
188+
ports:
189+
- "9020:80"
190+
environment:
191+
PGADMIN_DEFAULT_EMAIL: alexander.vaagan@crayon.com
192+
PGADMIN_DEFAULT_PASSWORD: strong-password
193+
volumes:
194+
- pgadmin-data:/var/lib/pgadmin
195+
networks: [ 'demo-net' ]
196+
173197
networks:
174198
demo-net:
175199
driver: bridge
@@ -178,4 +202,5 @@ volumes:
178202
mongodb-data:
179203
qdrant-data:
180204
meili_data:
181-
pg-data:
205+
pg-data:
206+
pgadmin-data:

packages/fetchcraft-core/src/fetchcraft/connector/base.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
import mimetypes
23
from abc import ABC, abstractmethod
34
from pathlib import Path
@@ -7,6 +8,8 @@
78
import fsspec
89
from pydantic import BaseModel, Field
910

11+
logger = logging.getLogger(__name__)
12+
1013

1114
class Role(BaseModel):
1215
name: str
@@ -24,7 +27,7 @@ class File(BaseModel, ABC):
2427
def __init__(self, path: Path, fs: fsspec.AbstractFileSystem, mimetype: Optional[str] = None, encoding: Optional[str] = None, **kwargs):
2528
if mimetype is None or encoding is None:
2629
_mimetype, _encoding = mimetypes.guess_type(path) or "application/octet-stream"
27-
mimetype = mimetype or _mimetype or "text/plain"
30+
mimetype = mimetype or _mimetype or "application/octet-stream"
2831
encoding = encoding or _encoding or "utf-8"
2932

3033
super().__init__(path=path, fs=fs, mimetype=mimetype, encoding=encoding, **kwargs)

packages/fetchcraft-core/src/fetchcraft/connector/filesystem.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ def _getpwgid(gid: int) -> str:
6767
st = self.fs.stat(str(self.path))
6868
return {
6969
"filename": self.path.name,
70+
"mimetype": self.mimetype,
71+
"encoding": self.encoding,
7072
"source": str(self.path),
7173
"size": st["size"],
7274
"modified": st["mtime"],

packages/fetchcraft-core/src/fetchcraft/ingestion/transformations.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ async def process(
127127
file_content = base64.b64decode(record.content)
128128

129129
# Select parser based on mimetype
130+
logger.info(f"Getting parser for {file_path} -> {mimetype}")
130131
parser = self.parser_map.get(mimetype, self.parser_map.get("default", None))
131132

132133
if not parser:
@@ -143,11 +144,8 @@ async def process(
143144

144145
# Build metadata to pass to parser
145146
parser_metadata = {
146-
"source": source,
147147
"path": file_path,
148-
"mimetype": mimetype,
149-
# Include any additional metadata from the record
150-
**dict(record),
148+
**record.metadata()
151149
}
152150

153151
# Check if parser is remote (async callback-based)

0 commit comments

Comments
 (0)