Skip to content

Commit 540a2cf

Browse files
authored
Merge pull request #58 from helpfulengineering/multicloud
Multicloud
2 parents 0208d01 + 5f68288 commit 540a2cf

23 files changed

+3985
-92
lines changed

README.md

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -97,12 +97,18 @@ Our documentation covers:
9797
```markdown
9898
open-matching-engine/
9999
├── docs/ # Documentation files (MkDocs)
100+
├── deploy/ # Cloud agnostic deployment
101+
├── scripts/ # Utility scripts for dev & testing
100102
├── src/ # Source code
101103
│ ├── core/ # Core framework components
102104
│ │ ├── api/ # API endpoints
103105
│ │ ├── domains/ # Domain implementations
106+
│ │ ├── errors/ # Centralized error handling
107+
│ │ ├── generation/ # Create OKH from external project
108+
│ │ ├── llm/ # LLM service and provider abstraction layer
104109
│ │ ├── matching/ # Matching Rules Manager
105110
│ │ ├── models/ # Data models
111+
│ │ ├── packaging/ # Service for building and storing OKH Packages
106112
│ │ ├── registry/ # Domain registry
107113
│ │ ├── services/ # Core services
108114
│ │ ├── storage/ # Storage service for remote file mgmt
@@ -148,26 +154,3 @@ python run.py
148154
```
149155

150156
For container deployment guides, see the [Container Guide](docs/development/container-guide.md) in our documentation.
151-
152-
153-
# Our current working OKH and OKW libraries
154-
Our current OKH and OKW libraries are implemented as publicly accessible Azure blob containers:
155-
156-
"Azure_Storage_ServiceName": "${AZURE_STORAGE_SERVICE_NAME}",
157-
"Azure_Storage_OKH_ContainerName": "${AZURE_STORAGE_OKH_CONTAINER_NAME:-okh}",
158-
"Azure_Storage_OKW_ContainerName": "${AZURE_STORAGE_OKW_CONTAINER_NAME:-okw}"
159-
160-
These OKHs and OKWs are taken from our repo: ${OKH_LIBRARY_REPO_URL:-https://github.com/example/library}.
161-
162-
Example OKW template and OKH extensions are defined here: ${OKF_SCHEMA_REPO_URL:-https://github.com/example/OKF-Schema}
163-
164-
We are currently working with the Internet of Production Alliance (IoPA) to unify these extensions with their official schemas.
165-
166-
**Configuration:**
167-
Set the following environment variables:
168-
- `AZURE_STORAGE_SERVICE_NAME`: Azure storage service URL
169-
- `AZURE_STORAGE_OKH_CONTAINER_NAME`: OKH container name (default: okh)
170-
- `AZURE_STORAGE_OKW_CONTAINER_NAME`: OKW container name (default: okw)
171-
- `OKH_LIBRARY_REPO_URL`: Repository URL for OKH library (optional)
172-
- `OKF_SCHEMA_REPO_URL`: Repository URL for OKF schema (optional)
173-

deploy/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
"""
2+
Cloud-agnostic deployment module.
3+
4+
This module provides abstractions for deploying the supply-graph-ai service
5+
to various cloud providers (GCP, AWS, Azure, etc.) and container hosting services.
6+
"""
7+
8+
from .base import BaseDeployer, BaseDeploymentConfig, DeploymentConfigError
9+
10+
__all__ = [
11+
"BaseDeployer",
12+
"BaseDeploymentConfig",
13+
"DeploymentConfigError",
14+
]
15+

deploy/base/__init__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
"""
2+
Base deployment abstraction layer.
3+
4+
This module provides the base classes and interfaces for cloud-agnostic deployment.
5+
All provider-specific deployers should inherit from BaseDeployer.
6+
"""
7+
8+
from .deployer import BaseDeployer
9+
from .config import BaseDeploymentConfig, DeploymentConfigError
10+
11+
__all__ = [
12+
"BaseDeployer",
13+
"BaseDeploymentConfig",
14+
"DeploymentConfigError",
15+
]
16+

deploy/base/config.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
"""
2+
Base deployment configuration classes.
3+
4+
Provides common configuration structures and validation for all deployment providers.
5+
"""
6+
7+
from dataclasses import dataclass, field
8+
from typing import Dict, Any, Optional, List
9+
from enum import Enum
10+
11+
12+
class DeploymentProvider(str, Enum):
13+
"""Supported deployment providers."""
14+
15+
GCP = "gcp"
16+
AWS = "aws"
17+
AZURE = "azure"
18+
DIGITALOCEAN = "digitalocean"
19+
LOCAL = "local"
20+
21+
22+
class DeploymentConfigError(Exception):
23+
"""Raised when deployment configuration is invalid."""
24+
25+
pass
26+
27+
28+
@dataclass
29+
class ServiceConfig:
30+
"""Common service configuration shared across all providers."""
31+
32+
name: str
33+
image: str
34+
port: int = 8080
35+
memory: str = "4Gi" # Updated: Required for NLP matching operations
36+
cpu: int = 2
37+
min_instances: int = 1
38+
max_instances: int = 100
39+
timeout: int = 300 # 5 minutes for long-running matching operations
40+
environment_vars: Dict[str, str] = field(default_factory=dict)
41+
secrets: Dict[str, str] = field(default_factory=dict)
42+
labels: Dict[str, str] = field(default_factory=dict)
43+
44+
def validate(self) -> None:
45+
"""Validate service configuration."""
46+
if not self.name:
47+
raise DeploymentConfigError("Service name is required")
48+
if not self.image:
49+
raise DeploymentConfigError("Service image is required")
50+
if self.port < 1 or self.port > 65535:
51+
raise DeploymentConfigError(f"Invalid port: {self.port}")
52+
if self.cpu < 1:
53+
raise DeploymentConfigError(f"CPU must be at least 1, got {self.cpu}")
54+
if self.min_instances < 0:
55+
raise DeploymentConfigError(
56+
f"min_instances must be >= 0, got {self.min_instances}"
57+
)
58+
if self.max_instances < self.min_instances:
59+
raise DeploymentConfigError(
60+
f"max_instances ({self.max_instances}) must be >= min_instances ({self.min_instances})"
61+
)
62+
if self.timeout < 1:
63+
raise DeploymentConfigError(f"Timeout must be at least 1 second, got {self.timeout}")
64+
65+
66+
@dataclass
67+
class BaseDeploymentConfig:
68+
"""Base deployment configuration for all providers."""
69+
70+
provider: DeploymentProvider
71+
environment: str = "production"
72+
region: Optional[str] = None # Provider-specific format, no default
73+
service: ServiceConfig = field(default_factory=lambda: ServiceConfig(
74+
name="supply-graph-ai",
75+
image="ghcr.io/helpfulengineering/supply-graph-ai:latest"
76+
))
77+
provider_config: Dict[str, Any] = field(default_factory=dict)
78+
79+
def validate(self) -> None:
80+
"""Validate deployment configuration."""
81+
if not self.provider:
82+
raise DeploymentConfigError("Provider is required")
83+
# Region validation is provider-specific, so we don't validate format here
84+
# Provider-specific deployers should validate region format
85+
if not self.environment:
86+
raise DeploymentConfigError("Environment is required")
87+
self.service.validate()
88+
89+
@classmethod
90+
def from_dict(cls, data: Dict[str, Any]) -> "BaseDeploymentConfig":
91+
"""Create configuration from dictionary."""
92+
provider_str = data.get("provider", "gcp")
93+
try:
94+
provider = DeploymentProvider(provider_str.lower())
95+
except ValueError:
96+
raise DeploymentConfigError(f"Unsupported provider: {provider_str}")
97+
98+
# Parse service config
99+
service_data = data.get("service", {})
100+
service = ServiceConfig(
101+
name=service_data.get("name", "supply-graph-ai"),
102+
image=service_data.get("image", "ghcr.io/helpfulengineering/supply-graph-ai:latest"),
103+
port=service_data.get("port", 8080),
104+
memory=service_data.get("memory", "4Gi"),
105+
cpu=service_data.get("cpu", 2),
106+
min_instances=service_data.get("min_instances", 1),
107+
max_instances=service_data.get("max_instances", 100),
108+
timeout=service_data.get("timeout", 300),
109+
environment_vars=service_data.get("environment_vars", {}),
110+
secrets=service_data.get("secrets", {}),
111+
labels=service_data.get("labels", {}),
112+
)
113+
114+
# Region should be provided in config or set by provider-specific config
115+
# No default region to avoid provider-specific assumptions
116+
config = cls(
117+
provider=provider,
118+
environment=data.get("environment", "production"),
119+
region=data.get("region"), # No default - must be specified
120+
service=service,
121+
provider_config=data.get("providers", {}).get(provider_str, {}),
122+
)
123+
124+
config.validate()
125+
return config
126+
127+
def to_dict(self) -> Dict[str, Any]:
128+
"""Convert configuration to dictionary."""
129+
return {
130+
"provider": self.provider.value,
131+
"environment": self.environment,
132+
"region": self.region,
133+
"service": {
134+
"name": self.service.name,
135+
"image": self.service.image,
136+
"port": self.service.port,
137+
"memory": self.service.memory,
138+
"cpu": self.service.cpu,
139+
"min_instances": self.service.min_instances,
140+
"max_instances": self.service.max_instances,
141+
"timeout": self.service.timeout,
142+
"environment_vars": self.service.environment_vars,
143+
"secrets": self.service.secrets,
144+
"labels": self.service.labels,
145+
},
146+
"providers": {
147+
self.provider.value: self.provider_config,
148+
},
149+
}
150+

deploy/base/deployer.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
"""
2+
Base deployer interface for cloud-agnostic deployment.
3+
4+
All provider-specific deployers must implement this interface.
5+
"""
6+
7+
from abc import ABC, abstractmethod
8+
from typing import Dict, Any, Optional
9+
import logging
10+
11+
from .config import BaseDeploymentConfig
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
class BaseDeployer(ABC):
17+
"""Base class for cloud provider deployers."""
18+
19+
def __init__(self, config: BaseDeploymentConfig):
20+
"""
21+
Initialize deployer with configuration.
22+
23+
Args:
24+
config: Deployment configuration
25+
"""
26+
self.config = config
27+
self.config.validate()
28+
logger.info(f"Initialized {self.__class__.__name__} for provider {config.provider.value}")
29+
30+
@abstractmethod
31+
def setup(self) -> None:
32+
"""
33+
Setup cloud resources (IAM, storage, etc.).
34+
35+
This method should be idempotent - calling it multiple times
36+
should not cause errors if resources already exist.
37+
38+
Raises:
39+
DeploymentError: If setup fails
40+
"""
41+
pass
42+
43+
@abstractmethod
44+
def deploy(self) -> str:
45+
"""
46+
Deploy service and return service URL.
47+
48+
Returns:
49+
Service URL (e.g., https://service.example.com)
50+
51+
Raises:
52+
DeploymentError: If deployment fails
53+
"""
54+
pass
55+
56+
@abstractmethod
57+
def get_service_url(self, service_name: Optional[str] = None) -> str:
58+
"""
59+
Get the deployed service URL.
60+
61+
Args:
62+
service_name: Optional service name (defaults to config.service.name)
63+
64+
Returns:
65+
Service URL
66+
67+
Raises:
68+
DeploymentError: If service not found
69+
"""
70+
pass
71+
72+
@abstractmethod
73+
def update(self) -> str:
74+
"""
75+
Update existing deployment.
76+
77+
Returns:
78+
Service URL
79+
80+
Raises:
81+
DeploymentError: If update fails
82+
"""
83+
pass
84+
85+
@abstractmethod
86+
def delete(self, service_name: Optional[str] = None) -> None:
87+
"""
88+
Delete deployment.
89+
90+
Args:
91+
service_name: Optional service name (defaults to config.service.name)
92+
93+
Raises:
94+
DeploymentError: If deletion fails
95+
"""
96+
pass
97+
98+
@abstractmethod
99+
def get_status(self, service_name: Optional[str] = None) -> Dict[str, Any]:
100+
"""
101+
Get deployment status.
102+
103+
Args:
104+
service_name: Optional service name (defaults to config.service.name)
105+
106+
Returns:
107+
Dictionary with status information (e.g., {"status": "running", "url": "..."})
108+
109+
Raises:
110+
DeploymentError: If status check fails
111+
"""
112+
pass
113+
114+
def validate_config(self) -> None:
115+
"""
116+
Validate deployment configuration.
117+
118+
This is called automatically during initialization, but can be
119+
called manually to check configuration before deployment.
120+
121+
Raises:
122+
DeploymentConfigError: If configuration is invalid
123+
"""
124+
self.config.validate()
125+

0 commit comments

Comments
 (0)