Skip to content

Commit 5493558

Browse files
committed
Fix for the --require-hashes mode pip issue
Added download of embedding model weights if needed Limited the markdown input files to .md extension
1 parent 19eb99f commit 5493558

File tree

2 files changed

+10
-3
lines changed

2 files changed

+10
-3
lines changed

byok/Containerfile.tool

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,18 @@ ARG LOG_LEVEL=info
55
ARG OUT_IMAGE_TAG=byok-image
66
ARG BYOK_TOOL_IMAGE
77
ARG UBI_BASE_IMAGE
8-
RUN dnf install -y buildah python3.11 python3.11-pip
8+
RUN dnf install -y buildah python3.11 python3.11-pip wget
99

1010
USER 0
1111
WORKDIR /workdir
1212

1313
COPY requirements.cpu.txt .
14-
RUN pip3.11 install --no-cache-dir -r requirements.cpu.txt
14+
RUN pip3.11 install --upgrade pip && pip3.11 install --no-cache-dir -r requirements.cpu.txt
1515

1616
COPY embeddings_model ./embeddings_model
17+
RUN cd embeddings_model && if [ ! -f embeddings_model/model.safetensors ]; then \
18+
wget -q https://huggingface.co/sentence-transformers/all-mpnet-base-v2/resolve/9a3225965996d404b775526de6dbfe85d3368642/model.safetensors; \
19+
fi
1720
COPY byok/generate_embeddings_tool.py byok/Containerfile.output .
1821

1922
ENV _BUILDAH_STARTED_IN_USERNS=""

byok/generate_embeddings_tool.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,11 @@ def file_metadata_func(file_path: str) -> Dict:
9292

9393
# Load documents
9494
documents = SimpleDirectoryReader(
95-
args.input_dir, recursive=True, file_metadata=file_metadata_func
95+
args.input_dir,
96+
recursive=True,
97+
required_exts=[".md"],
98+
file_extractor={".md": FlatReader()},
99+
file_metadata=file_metadata_func
96100
).load_data()
97101

98102
# Create chunks/nodes

0 commit comments

Comments
 (0)