Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ sbom_*.json
*_requirements.txt
pylock.*
snyk
.vscode

# We do not want library lock files b/c they're not
# actually used by any applications (and they will)
Expand All @@ -226,3 +227,14 @@ snyk
# common dep, but the actual installed version for the service
# is reflected in the service's own uv.lock, which IS checked in.
libraries/*/uv.lock

# gen3_inference
knowledge/
*.sqlite3
chroma/
cache/
tests/prof/
prof/
bin/library
tmp/
*.bak
15 changes: 9 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,23 @@ See [docs/ai_api.yaml](docs/ai_api.yaml) for the OpenAPI specification. You can
* Simplified setup, building, running
* `just install`, `just run gen3_embeddings`, `just build`

> Alternative: use a base service instead of library. Justification for library: explicitly labeling the common code as a library (e.g. not meant to be run by itself) provides more clarity on intended use.

Services can import common code from libraries:
Services can import common code:

```python
from libraries.common import TEST
from common.config import DEBUG
```

Services have folder structure:
Services (and libraries) have folder structure:

* `src/{{name}}`
* `pyproject.toml` which builds {{name}} from src/{{name}}

> Alternative: all services could share a common `gen3` (or similar) package. Justification to not sharing a common package: the `src` setup is a more explicit separation of concerns and discourages cross-service importing without using the common library.
### Why this setup?

- General benefits of a monorepo (common patterns for maintaining code in a single repo)
- Per-service uv environments ensure minimal required dependencies for each
- Common library project allows cross-service code and dependencies to be
maintained in one place

## Quickstart

Expand Down
67 changes: 35 additions & 32 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ setup:
curl -LsSf https://astral.sh/uv/install.sh | sh
fi

print_header "just setup:" "verifying" "postgres" "installation..."
print_header "just setup:" "verifying" "PostgreSQL client (psql)" "installation..."
if command -v psql >/dev/null 2>&1; then
echo "PostgreSQL client (psql) is installed."
echo " version: $(psql --version)"
Expand All @@ -28,28 +28,32 @@ setup:

for dir in services/*; do
if [[ -n "${dir#services/}" ]]; then
print_header "just setup:" "setting up postgres db for" "${dir#services/}" "service..."

if [ ! -f "${dir}/.env" ]; then
echo "${RED}** WARNING: .env file not found in "${dir}". Will rely on environment variables. **${RESET}"
if [ "${dir#services/}" == "gen3_inference" ]; then
print_header "just setup:" "No PostgreSQL db needed for" "${dir#services/}" "service. Nothing to do."
else
echo "Found .env file. Using it to set up database."
set -a
source "${dir}/.env"
set +a
fi

if [[ -z ${PGDATABASE:-} ]]; then
echo "PGDATABASE not set, using ${dir#services/}..."
export PGDATABASE="${dir#services/}"
print_header "just setup:" "setting up PostgreSQL db for" "${dir#services/}" "service..."

if [ ! -f "${dir}/.env" ]; then
echo "${RED}** WARNING: .env file not found in "${dir}". Will rely on environment variables. **${RESET}"
else
echo "Found .env file. Using it to set up database."
set -a
source "${dir}/.env"
set +a
fi

if [[ -z ${PGDATABASE:-} ]]; then
echo "PGDATABASE not set, using ${dir#services/}..."
export PGDATABASE="${dir#services/}"
fi

psql \
-h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" \
-c "CREATE DATABASE \"${PGDATABASE}\" WITH OWNER \"${PGUSER}\";" \
2>/dev/null || echo "Database already exists."

# TODO: db migration / initial setup
fi

psql \
-h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" \
-c "CREATE DATABASE \"${PGDATABASE}\" WITH OWNER \"${PGUSER}\";" \
2>/dev/null || echo "Database already exists."

# TODO: db migration / initial setup
fi
done

Expand All @@ -68,7 +72,14 @@ install $SERVICE="all":
# print_header COMMAND TEXT SERVICE TEXT
print_header "just install:" "installing" "$SERVICE" "service..."

uv sync --all-packages --group dev --directory "./services/$SERVICE"
echo "Installing common library into service: ${SERVICE}..."
cd "./services/$SERVICE"
uv add "common @ file://../../libraries/common"
cd -

echo
echo "uv sync-ing $SERVICE service with --group dev and --all-extras..."
uv sync --all-packages --group dev --directory "./services/$SERVICE" --all-extras
fi

lock $SERVICE="all":
Expand All @@ -94,7 +105,7 @@ test $SERVICE="all":
else
print_header "just test:" "testing" "$SERVICE" "service..."
cd "./services/$SERVICE"
uv run pytest .
uv run pytest . -vv
exit_code=$?
cd -

Expand Down Expand Up @@ -129,7 +140,7 @@ build $SERVICE="all":
export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=1

# Start the app with OpenTelemetry and Gunicorn and Uvicorn workers
uv run --directory "./services/$SERVICE" \
uv run --env-file "../../.env" --directory "./services/$SERVICE" \
opentelemetry-instrument \
gunicorn \
$SERVICE.main:app_instance \
Expand Down Expand Up @@ -236,14 +247,6 @@ lint $SERVICE="all":
overall_exit=$((overall_exit | $exit_code))
echo

print_header "just lint:" "deptry" "$SERVICE" "..."
uv run --directory "./services/$SERVICE" deptry ./src

exit_code=$?
report_error_if_failed $exit_code "just lint:" "deptry" "$SERVICE" "service!"
overall_exit=$((overall_exit | $exit_code))
echo

report_error_or_success $overall_exit "just lint:" "linting" "$SERVICE" "service!"

exit $overall_exit
Expand Down
6 changes: 4 additions & 2 deletions libraries/common/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ dependencies = [
"opentelemetry-sdk>=1.39.1",
"prometheus-client>=0.24.1",
"pytest>=9.0.2",
"pyyaml>=6.0.3",
"requests>=2.32.5",
"ruff>=0.15.6",
"starlette>=0.52.1",
"uvicorn>=0.41.0",
]
Expand All @@ -29,7 +32,6 @@ dev = [
"pytest-xdist",
"pyright",
"pylint>=4.0.5",
"deptry>=0.24.0",
]

[tool.uv]
Expand All @@ -40,4 +42,4 @@ requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["common"]
packages = ["src/common"]
File renamed without changes.
73 changes: 73 additions & 0 deletions libraries/common/src/common/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import os
import sys
from pathlib import Path

import cdislogging
from starlette.config import Config

def get_venv_root() -> Path | None:
"""
Return the absolute Path to the root of the current virtual environment,
or None if the interpreter is running from the system Python.
"""
if hasattr(sys, 'base_prefix'):
if sys.prefix != sys.base_prefix:
return Path(sys.prefix).parent

return None

# NOTE: Default config only works when:
# The .env is in its standard location:
# /services/{service_name}/.env
# AND the common library is installed in a virtualenv for the service
# AND the virtualenv directory is in:
# /services/{service_name}/{venv_name}
CURRENT_DIR = get_venv_root() or os.path.dirname(os.path.realpath(__file__))
CONFIG_PATH = os.path.abspath(os.getenv("CONFIG_PATH", f"{CURRENT_DIR}/.env"))

starlette_config = Config(CONFIG_PATH)
DEBUG = starlette_config("DEBUG", cast=bool, default=False)

# this turns on debug logging for certain noisy internal libraries
# Note: the list of libraries is in the gunicorn.conf.py
VERBOSE_INTERNAL_LOGS = starlette_config("VERBOSE_INTERNAL_LOGS", cast=bool, default=False)

logging = cdislogging.get_logger(__name__, log_level="debug" if DEBUG else "info")

logging.info(f"Using configuration file: {CONFIG_PATH}")

# will skip authorization when a token is not provided. note that if a token is provided, then
# auth will still occur
DEBUG_SKIP_AUTH = starlette_config("DEBUG_SKIP_AUTH", cast=bool, default=False)

# this will effectively turn off authorization checking,
# allowing for anyone to use the AI functionality
ALLOW_ANONYMOUS_ACCESS = starlette_config("ALLOW_ANONYMOUS_ACCESS", cast=bool, default=False)

logging.info(f"DEBUG is {DEBUG}")
logging.info(f"VERBOSE_INTERNAL_LOGS is {VERBOSE_INTERNAL_LOGS}")

if DEBUG_SKIP_AUTH:
logging.warning(
f"DEBUG_SKIP_AUTH is {DEBUG_SKIP_AUTH}. Authorization will be SKIPPED if no token is provided. "
"FOR NON-PRODUCTION USE ONLY!! USE WITH CAUTION!!"
)
if ALLOW_ANONYMOUS_ACCESS:
logging.warning(
f"ALLOW_ANONYMOUS_ACCESS is {ALLOW_ANONYMOUS_ACCESS}. Authorization will be SKIPPED. "
"ENSURE THIS IS ACCEPTABLE!!"
)

# Location of the policy engine service, Arborist
# Defaults to the default service name in k8s magic DNS setup
ARBORIST_URL = starlette_config("ARBORIST_URL", default="http://arborist-service", cast=str)

PUBLIC_ROUTES = {"/", "/_status", "/_status/", "/_version", "/_version/"}
ENDPOINTS_WITHOUT_METRICS = {"/metrics", "/metrics/"} | PUBLIC_ROUTES

# This app exports traces using OpenTelemetry. By default in Gen3, we use Alloy for collection.
ENABLE_OPENTELEMETRY_TRACES = starlette_config("ENABLE_OPENTELEMETRY_TRACES", cast=bool, default=True)
# For local development, set this to an EMPTY STRING and it will output to console. See gunicorn.conf.py
OTEL_EXPORTER_OTLP_ENDPOINT = starlette_config("OTEL_EXPORTER_OTLP_ENDPOINT", default="http://alloy.monitoring.4318", cast=str)

ASYNC_HTTP_CLIENT_TIMEOUT = starlette_config("ASYNC_HTTP_CLIENT_TIMEOUT", cast=float, default=30)
41 changes: 7 additions & 34 deletions services/gen3_ai_model_repo/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
name = "gen3_ai_model_repo"
version = "0.1.0"
description = "Gen3 AI Model Repo"
dependencies = ["common", "asyncpg"]
dependencies = [
"common",
"asyncpg",
]
requires-python = "~=3.13.0"

[dependency-groups]
Expand All @@ -11,42 +14,12 @@ dev = []
[tool.uv]
package = true

[tool.uv.workspace]
members = [
"../../libraries/*",
]

[tool.uv.sources]
common = { workspace = true }

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.uv.sources]
common = { path = "../../libraries/common" }

[tool.hatch.build.targets.wheel]
packages = ["src/gen3_ai_model_repo"]

[tool.deptry.per_rule_ignores]
DEP002 = ["common"]

# anything in libraries/common we need to add here
# b/c deptry will see it as a transitive dependency but
# we actually WANT it to NOT be in both the service and
# libraries/common. So this is intentional / by design
# for shared libraries.
DEP003 = [
"authutils",
"asyncpg",
"cdislogging",
"click",
"fastapi",
"gen3authz",
"gunicorn",
"opentelemetry-api",
"opentelemetry-exporter-otlp",
"opentelemetry-instrumentation-fastapi",
"opentelemetry-sdk",
"prometheus-client",
"starlette",
"uvicorn",
]
Loading
Loading