Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 152 additions & 0 deletions .github/workflows/ci-pipeline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
name: CI Pipeline

on:
pull_request:
push:
branches: [main]
tags:
- "v*"

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- run: pip install black isort ruff
- run: black --check .
- run: isort --check-only .
- run: ruff check .

build:
runs-on: ubuntu-latest
needs: lint
outputs:
image_tag: ${{ steps.meta.outputs.sha_tag }}
steps:
- uses: actions/checkout@v4

- name: Generate tag
id: meta
run: echo "sha_tag=sha-${GITHUB_SHA::7}" >> $GITHUB_OUTPUT

- name: Build Docker image
uses: docker/build-push-action@v5
with:
context: .
file: ./Containerfile
load: true
tags: test-image:${{ steps.meta.outputs.sha_tag }}

- name: Save image as artifact
run: docker save test-image:${{ steps.meta.outputs.sha_tag }} -o image.tar

- name: Upload image artifact
uses: actions/upload-artifact@v4
with:
name: test-image
path: image.tar

test:
needs: [lint, build]
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
db: [pgvector, redis, elastic, qdrant]

steps:
- uses: actions/checkout@v4

- name: Download image artifact
uses: actions/download-artifact@v4
with:
name: test-image
path: .

- name: Load Docker image
run: docker load -i image.tar

- name: Start PGVector
if: matrix.db == 'pgvector'
run: |
docker run -d --name pgvector-test \
-e POSTGRES_USER=user \
-e POSTGRES_PASSWORD=pass \
-e POSTGRES_DB=mydb \
-p 5432:5432 \
ankane/pgvector

- name: Start Redis
if: matrix.db == 'redis'
run: |
docker run -d --name redis-test \
-p 6379:6379 \
redis/redis-stack-server:6.2.6-v19

- name: Start Elasticsearch
if: matrix.db == 'elastic'
run: |
docker run -d --name es-test \
-e "discovery.type=single-node" \
-e "xpack.security.enabled=true" \
-e "ELASTIC_PASSWORD=changeme" \
-e "ES_JAVA_OPTS=-Xms512m -Xmx512m" \
-p 9200:9200 \
elasticsearch:8.11.1

- name: Start Qdrant
if: matrix.db == 'qdrant'
run: |
docker run -d --name qdrant-test \
-p 6333:6333 \
qdrant/qdrant

- name: Wait for DB to start
run: sleep 30

- name: Run embed job
run: |
docker run --rm --network host \
-e LOG_LEVEL=debug \
-e DB_TYPE=${{ matrix.db }} \
test-image:${{ needs.build.outputs.image_tag }}

release:
if: github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/')
runs-on: ubuntu-latest
needs: [lint, build, test]
steps:
- uses: actions/checkout@v4

- name: Log in to Quay.io
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ secrets.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}

- name: Download image artifact
uses: actions/download-artifact@v4
with:
name: test-image
path: .

- name: Load Docker image
run: docker load -i image.tar

- name: Tag and push image
run: |
docker tag test-image:${{ needs.build.outputs.image_tag }} quay.io/dminnear/vector-embedder:${{ needs.build.outputs.image_tag }}

if [[ $GITHUB_REF == refs/tags/* ]]; then
docker tag test-image:${{ needs.build.outputs.image_tag }} quay.io/dminnear/vector-embedder:${GITHUB_REF#refs/tags/}
docker push quay.io/dminnear/vector-embedder:${GITHUB_REF#refs/tags/}
elif [[ $GITHUB_REF == refs/heads/main ]]; then
docker tag test-image:${{ needs.build.outputs.image_tag }} quay.io/dminnear/vector-embedder:latest
docker push quay.io/dminnear/vector-embedder:latest
fi

docker push quay.io/dminnear/vector-embedder:${{ needs.build.outputs.image_tag }}
28 changes: 0 additions & 28 deletions .github/workflows/lint.yaml

This file was deleted.

51 changes: 0 additions & 51 deletions .github/workflows/push-to-quay.yaml

This file was deleted.

42 changes: 24 additions & 18 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import logging
import os
from dataclasses import dataclass
from typing import Dict, List
Expand Down Expand Up @@ -97,13 +98,28 @@ def load() -> "Config":
load_dotenv()
get = Config._get_required_env_var

# Initialize logger
log_level_name = get("LOG_LEVEL").lower()
log_levels = {
"debug": 10,
"info": 20,
"warning": 30,
"error": 40,
"critical": 50,
}
if log_level_name not in log_levels:
raise ValueError(
f"Invalid LOG_LEVEL: '{log_level_name}'. Must be one of: {', '.join(log_levels)}"
)
log_level = log_levels[log_level_name]
logging.basicConfig(level=log_level)
logger = logging.getLogger(__name__)
logger.debug("Logging initialized at level: %s", log_level_name.upper())

# Initialize db
db_type = get("DB_TYPE")
db_provider = Config._init_db_provider(db_type)

chunk_size = int(get("CHUNK_SIZE"))
chunk_overlap = int(get("CHUNK_OVERLAP"))
temp_dir = get("TEMP_DIR")

# Web URLs
web_sources_raw = get("WEB_SOURCES")
try:
Expand All @@ -118,20 +134,10 @@ def load() -> "Config":
except json.JSONDecodeError as e:
raise ValueError(f"Invalid REPO_SOURCES JSON: {e}") from e

# Logging
log_level_name = get("LOG_LEVEL").lower()
log_levels = {
"debug": 10,
"info": 20,
"warning": 30,
"error": 40,
"critical": 50,
}
if log_level_name not in log_levels:
raise ValueError(
f"Invalid LOG_LEVEL: '{log_level_name}'. Must be one of: {', '.join(log_levels)}"
)
log_level = log_levels[log_level_name]
# Misc
chunk_size = int(get("CHUNK_SIZE"))
chunk_overlap = int(get("CHUNK_OVERLAP"))
temp_dir = get("TEMP_DIR")

return Config(
db_provider=db_provider,
Expand Down
Loading