Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 6 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,18 @@ Our documentation covers:
```markdown
open-matching-engine/
├── docs/ # Documentation files (MkDocs)
├── deploy/ # Cloud agnostic deployment
├── scripts/ # Utility scripts for dev & testing
├── src/ # Source code
│ ├── core/ # Core framework components
│ │ ├── api/ # API endpoints
│ │ ├── domains/ # Domain implementations
│ │ ├── errors/ # Centralized error handling
│ │ ├── generation/ # Create OKH from external project
│ │ ├── llm/ # LLM service and provider abstraction layer
│ │ ├── matching/ # Matching Rules Manager
│ │ ├── models/ # Data models
│ │ ├── packaging/ # Service for building and storing OKH Packages
│ │ ├── registry/ # Domain registry
│ │ ├── services/ # Core services
│ │ ├── storage/ # Storage service for remote file mgmt
Expand Down Expand Up @@ -148,26 +154,3 @@ python run.py
```

For container deployment guides, see the [Container Guide](docs/development/container-guide.md) in our documentation.


# Our current working OKH and OKW libraries
Our current OKH and OKW libraries are implemented as publicly accessible Azure blob containers:

"Azure_Storage_ServiceName": "${AZURE_STORAGE_SERVICE_NAME}",
"Azure_Storage_OKH_ContainerName": "${AZURE_STORAGE_OKH_CONTAINER_NAME:-okh}",
"Azure_Storage_OKW_ContainerName": "${AZURE_STORAGE_OKW_CONTAINER_NAME:-okw}"

These OKHs and OKWs are taken from our repo: ${OKH_LIBRARY_REPO_URL:-https://github.com/example/library}.

Example OKW template and OKH extensions are defined here: ${OKF_SCHEMA_REPO_URL:-https://github.com/example/OKF-Schema}

We are currently working with the Internet of Production Alliance (IoPA) to unify these extensions with their official schemas.

**Configuration:**
Set the following environment variables:
- `AZURE_STORAGE_SERVICE_NAME`: Azure storage service URL
- `AZURE_STORAGE_OKH_CONTAINER_NAME`: OKH container name (default: okh)
- `AZURE_STORAGE_OKW_CONTAINER_NAME`: OKW container name (default: okw)
- `OKH_LIBRARY_REPO_URL`: Repository URL for OKH library (optional)
- `OKF_SCHEMA_REPO_URL`: Repository URL for OKF schema (optional)

15 changes: 15 additions & 0 deletions deploy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""
Cloud-agnostic deployment module.

This module provides abstractions for deploying the supply-graph-ai service
to various cloud providers (GCP, AWS, Azure, etc.) and container hosting services.
"""

from .base import BaseDeployer, BaseDeploymentConfig, DeploymentConfigError

__all__ = [
"BaseDeployer",
"BaseDeploymentConfig",
"DeploymentConfigError",
]

16 changes: 16 additions & 0 deletions deploy/base/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""
Base deployment abstraction layer.

This module provides the base classes and interfaces for cloud-agnostic deployment.
All provider-specific deployers should inherit from BaseDeployer.
"""

from .deployer import BaseDeployer
from .config import BaseDeploymentConfig, DeploymentConfigError

__all__ = [
"BaseDeployer",
"BaseDeploymentConfig",
"DeploymentConfigError",
]

150 changes: 150 additions & 0 deletions deploy/base/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""
Base deployment configuration classes.

Provides common configuration structures and validation for all deployment providers.
"""

from dataclasses import dataclass, field
from typing import Dict, Any, Optional, List
from enum import Enum


class DeploymentProvider(str, Enum):
"""Supported deployment providers."""

GCP = "gcp"
AWS = "aws"
AZURE = "azure"
DIGITALOCEAN = "digitalocean"
LOCAL = "local"


class DeploymentConfigError(Exception):
"""Raised when deployment configuration is invalid."""

pass


@dataclass
class ServiceConfig:
"""Common service configuration shared across all providers."""

name: str
image: str
port: int = 8080
memory: str = "4Gi" # Updated: Required for NLP matching operations
cpu: int = 2
min_instances: int = 1
max_instances: int = 100
timeout: int = 300 # 5 minutes for long-running matching operations
environment_vars: Dict[str, str] = field(default_factory=dict)
secrets: Dict[str, str] = field(default_factory=dict)
labels: Dict[str, str] = field(default_factory=dict)

def validate(self) -> None:
"""Validate service configuration."""
if not self.name:
raise DeploymentConfigError("Service name is required")
if not self.image:
raise DeploymentConfigError("Service image is required")
if self.port < 1 or self.port > 65535:
raise DeploymentConfigError(f"Invalid port: {self.port}")
if self.cpu < 1:
raise DeploymentConfigError(f"CPU must be at least 1, got {self.cpu}")
if self.min_instances < 0:
raise DeploymentConfigError(
f"min_instances must be >= 0, got {self.min_instances}"
)
if self.max_instances < self.min_instances:
raise DeploymentConfigError(
f"max_instances ({self.max_instances}) must be >= min_instances ({self.min_instances})"
)
if self.timeout < 1:
raise DeploymentConfigError(f"Timeout must be at least 1 second, got {self.timeout}")


@dataclass
class BaseDeploymentConfig:
"""Base deployment configuration for all providers."""

provider: DeploymentProvider
environment: str = "production"
region: Optional[str] = None # Provider-specific format, no default
service: ServiceConfig = field(default_factory=lambda: ServiceConfig(
name="supply-graph-ai",
image="ghcr.io/helpfulengineering/supply-graph-ai:latest"
))
provider_config: Dict[str, Any] = field(default_factory=dict)

def validate(self) -> None:
"""Validate deployment configuration."""
if not self.provider:
raise DeploymentConfigError("Provider is required")
# Region validation is provider-specific, so we don't validate format here
# Provider-specific deployers should validate region format
if not self.environment:
raise DeploymentConfigError("Environment is required")
self.service.validate()

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "BaseDeploymentConfig":
"""Create configuration from dictionary."""
provider_str = data.get("provider", "gcp")
try:
provider = DeploymentProvider(provider_str.lower())
except ValueError:
raise DeploymentConfigError(f"Unsupported provider: {provider_str}")

# Parse service config
service_data = data.get("service", {})
service = ServiceConfig(
name=service_data.get("name", "supply-graph-ai"),
image=service_data.get("image", "ghcr.io/helpfulengineering/supply-graph-ai:latest"),
port=service_data.get("port", 8080),
memory=service_data.get("memory", "4Gi"),
cpu=service_data.get("cpu", 2),
min_instances=service_data.get("min_instances", 1),
max_instances=service_data.get("max_instances", 100),
timeout=service_data.get("timeout", 300),
environment_vars=service_data.get("environment_vars", {}),
secrets=service_data.get("secrets", {}),
labels=service_data.get("labels", {}),
)

# Region should be provided in config or set by provider-specific config
# No default region to avoid provider-specific assumptions
config = cls(
provider=provider,
environment=data.get("environment", "production"),
region=data.get("region"), # No default - must be specified
service=service,
provider_config=data.get("providers", {}).get(provider_str, {}),
)

config.validate()
return config

def to_dict(self) -> Dict[str, Any]:
"""Convert configuration to dictionary."""
return {
"provider": self.provider.value,
"environment": self.environment,
"region": self.region,
"service": {
"name": self.service.name,
"image": self.service.image,
"port": self.service.port,
"memory": self.service.memory,
"cpu": self.service.cpu,
"min_instances": self.service.min_instances,
"max_instances": self.service.max_instances,
"timeout": self.service.timeout,
"environment_vars": self.service.environment_vars,
"secrets": self.service.secrets,
"labels": self.service.labels,
},
"providers": {
self.provider.value: self.provider_config,
},
}

125 changes: 125 additions & 0 deletions deploy/base/deployer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""
Base deployer interface for cloud-agnostic deployment.

All provider-specific deployers must implement this interface.
"""

from abc import ABC, abstractmethod
from typing import Dict, Any, Optional
import logging

from .config import BaseDeploymentConfig

logger = logging.getLogger(__name__)


class BaseDeployer(ABC):
"""Base class for cloud provider deployers."""

def __init__(self, config: BaseDeploymentConfig):
"""
Initialize deployer with configuration.

Args:
config: Deployment configuration
"""
self.config = config
self.config.validate()
logger.info(f"Initialized {self.__class__.__name__} for provider {config.provider.value}")

@abstractmethod
def setup(self) -> None:
"""
Setup cloud resources (IAM, storage, etc.).

This method should be idempotent - calling it multiple times
should not cause errors if resources already exist.

Raises:
DeploymentError: If setup fails
"""
pass

@abstractmethod
def deploy(self) -> str:
"""
Deploy service and return service URL.

Returns:
Service URL (e.g., https://service.example.com)

Raises:
DeploymentError: If deployment fails
"""
pass

@abstractmethod
def get_service_url(self, service_name: Optional[str] = None) -> str:
"""
Get the deployed service URL.

Args:
service_name: Optional service name (defaults to config.service.name)

Returns:
Service URL

Raises:
DeploymentError: If service not found
"""
pass

@abstractmethod
def update(self) -> str:
"""
Update existing deployment.

Returns:
Service URL

Raises:
DeploymentError: If update fails
"""
pass

@abstractmethod
def delete(self, service_name: Optional[str] = None) -> None:
"""
Delete deployment.

Args:
service_name: Optional service name (defaults to config.service.name)

Raises:
DeploymentError: If deletion fails
"""
pass

@abstractmethod
def get_status(self, service_name: Optional[str] = None) -> Dict[str, Any]:
"""
Get deployment status.

Args:
service_name: Optional service name (defaults to config.service.name)

Returns:
Dictionary with status information (e.g., {"status": "running", "url": "..."})

Raises:
DeploymentError: If status check fails
"""
pass

def validate_config(self) -> None:
"""
Validate deployment configuration.

This is called automatically during initialization, but can be
called manually to check configuration before deployment.

Raises:
DeploymentConfigError: If configuration is invalid
"""
self.config.validate()

Loading
Loading