Skip to content

Commit 00aef58

Browse files
authored
Merge pull request #25 from RobGeada/DB
feat: Add MariaDB storage_interface
2 parents 0a57e99 + d7232c4 commit 00aef58

20 files changed

+56851
-71
lines changed

.github/workflows/python-tests.yaml

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,34 +22,45 @@ jobs:
2222
with:
2323
python-version: ${{ matrix.python-version }}
2424
cache: "pip"
25-
25+
- name: Install MariaDB and MariaDB Connector
26+
run: |
27+
sudo apt-get install
28+
sudo apt-get install -y libmariadb3 libmariadb-dev mariadb-server
2629
- name: Install dependencies
2730
run: |
2831
python -m pip install --upgrade pip
2932
python -m pip install uv
3033
python -m venv .venv
3134
source .venv/bin/activate
3235
if [ -f pyproject.toml ]; then
33-
uv pip install -e ".[dev,protobuf]"
36+
uv pip install -e ".[dev,protobuf,mariadb]"
3437
else
3538
uv pip install pytest
3639
fi
3740
3841
- name: Install protobuf compiler
3942
run: |
40-
sudo apt-get update
4143
sudo apt-get install -y protobuf-compiler
42-
4344
- name: Generate protobuf stubs
4445
run: |
4546
source .venv/bin/activate
4647
bash scripts/generate_protos.sh
47-
48+
- name: Shutdown Ubuntu MySQL
49+
run: sudo service mysql stop
50+
- name: Set up MariaDB
51+
uses: getong/[email protected]
52+
with:
53+
mysql user: 'trustyai'
54+
mysql password: 'trustyai'
55+
mysql root password: ''
56+
mysql database: 'trustyai-database'
57+
- name: Populate Database
58+
run: |
59+
mariadb -u trustyai --password=trustyai -D trustyai-database -h 127.0.0.1 < tests/resources/legacy_database_dump.sql
4860
- name: Run tests with pytest
4961
run: |
5062
source .venv/bin/activate
5163
pytest tests/ -v --cov=src --cov-report=xml
52-
5364
- name: Upload coverage to Codecov
5465
uses: codecov/codecov-action@v4
5566
with:

Dockerfile

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,18 @@ WORKDIR /app
1010
COPY pyproject.toml poetry.lock* README.md ./
1111

1212

13-
1413
USER root
14+
15+
# install mariadb connector from MariaDB Community Service package repository
16+
RUN if [[ "$EXTRAS" == *"mariadb"* ]]; then \
17+
curl -LsSO https://r.mariadb.com/downloads/mariadb_repo_setup && \
18+
echo "c4a0f3dade02c51a6a28ca3609a13d7a0f8910cccbb90935a2f218454d3a914a mariadb_repo_setup" | sha256sum -c - && \
19+
chmod +x mariadb_repo_setup && \
20+
./mariadb_repo_setup --mariadb-server-version="mariadb-10.6" && \
21+
dnf install -y MariaDB-shared MariaDB-devel; \
22+
fi
1523
RUN pip install uv==0.6.16 && \
16-
uv pip install .[$EXTRAS]
24+
uv pip install ".[$EXTRAS]"
1725
COPY . .
1826
USER 1001
1927
EXPOSE 4443

README.md

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -32,31 +32,27 @@ environment, a Jupyter Notebook, or in Kubernetes.
3232

3333
---
3434
## 📦 Building 📦
35-
### Locally (Without Eval Support)
36-
```bash
37-
uv pip install .
38-
````
39-
40-
### Locally (With Eval Support)
35+
### Locally
4136
```bash
42-
uv pip install .[eval]
43-
````
37+
uv pip install ".[$EXTRAS]"
38+
```
4439

45-
### Container (Without Eval Support)
40+
### Container
4641
```bash
47-
podman build -t $IMAGE_NAME .
48-
````
42+
podman build -t $IMAGE_NAME --build-arg EXTRAS="$EXTRAS" .
43+
```
4944

50-
### Container (With Eval Support)
51-
```bash
52-
podman build -t $IMAGE_NAME --build-arg EXTRAS=eval .
53-
````
45+
### Available Extras
46+
Pass these extras as a comma separated list, e.g., `"mariadb,protobuf"`
47+
* `protobuf`: To process model inference data from ModelMesh models, you can install with `protobuf` support. Otherwise, only KServe models will be supported.
48+
* `eval`: To enable the Language Model Evaluation servers, install with `eval` support.
49+
* `mariadb` (If installing locally, install the [MariaDB Connector/C](https://mariadb.com/docs/server/connect/programming-languages/c/install/) first.)
5450

55-
### Locally (With ModelMesh/Protobuf Support)
51+
### Examples
5652
```bash
57-
uv pip install .[protobuf]
58-
````
59-
53+
uv pip install ".[mariadb,protobuf,eval]"
54+
podman build -t $IMAGE_NAME --build-arg EXTRAS="mariadb,protobuf,eval" .
55+
```
6056

6157
## 🏃Running 🏃‍♀️
6258
### Locally
@@ -91,12 +87,6 @@ python -m pytest --cov=src
9187
## 🔄 Protobuf Support 🔄
9288
To process model inference data from ModelMesh models, you can install protobuf support. Otherwise, only KServe models will be supported.
9389

94-
### Installing Dependencies
95-
Install the required dependencies for protobuf support:
96-
```bash
97-
uv pip install -e ".[protobuf]"
98-
```
99-
10090
### Generating Protobuf Code
10191
After installing dependencies, generate Python code from the protobuf definitions:
10292

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ dev = [
2828
]
2929
eval = ["lm-eval[api]==0.4.4", "fastapi-utils>=0.8.0", "typing-inspect==0.9.0"]
3030
protobuf = ["numpy>=1.24.0,<3", "grpcio>=1.62.1,<2", "grpcio-tools>=1.62.1,<2"]
31+
mariadb = ["mariadb>=1.1.12", "javaobj-py3==0.4.4"]
32+
3133

3234
[tool.hatch.build.targets.sdist]
3335
include = ["src"]

src/endpoints/consumer/consumer_endpoint.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
logger = logging.getLogger(__name__)
2828

2929
PartialKind = Literal["request", "response"]
30-
storage_inferface = get_storage_interface()
30+
storage_interface = get_storage_interface()
3131
unreconciled_inputs = {}
3232
unreconciled_outputs = {}
3333

@@ -160,11 +160,11 @@ async def consume_inference_payload(
160160
) from e
161161

162162
# Store the input payload
163-
await storage_inferface.persist_modelmesh_payload(
163+
await storage_interface.persist_modelmesh_payload(
164164
partial_payload, payload_id, is_input
165165
)
166166

167-
output_payload = await storage_inferface.get_modelmesh_payload(
167+
output_payload = await storage_interface.get_modelmesh_payload(
168168
payload_id, False
169169
)
170170

@@ -188,11 +188,11 @@ async def consume_inference_payload(
188188
) from e
189189

190190
# Store the output payload
191-
await storage_inferface.persist_modelmesh_payload(
191+
await storage_interface.persist_modelmesh_payload(
192192
partial_payload, payload_id, is_input
193193
)
194194

195-
input_payload = await storage_inferface.get_modelmesh_payload(
195+
input_payload = await storage_interface.get_modelmesh_payload(
196196
payload_id, True
197197
)
198198

@@ -253,11 +253,11 @@ async def reconcile_modelmesh_payloads(
253253
metadata_cols = ["iso_time", "unix_timestamp", "tags"]
254254

255255
await asyncio.gather(
256-
storage_inferface.write_data(input_dataset, df[input_cols].values, input_cols),
257-
storage_inferface.write_data(
256+
storage_interface.write_data(input_dataset, df[input_cols].values, input_cols),
257+
storage_interface.write_data(
258258
output_dataset, df[output_cols].values, output_cols
259259
),
260-
storage_inferface.write_data(metadata_dataset, metadata, metadata_cols),
260+
storage_interface.write_data(metadata_dataset, metadata, metadata_cols),
261261
)
262262

263263
shapes = await ModelData(model_id).shapes()
@@ -273,8 +273,8 @@ async def reconcile_modelmesh_payloads(
273273
)
274274

275275
# Clean up
276-
await storage_inferface.delete_modelmesh_payload(request_id, True)
277-
await storage_inferface.delete_modelmesh_payload(request_id, False)
276+
await storage_interface.delete_modelmesh_payload(request_id, True)
277+
await storage_interface.delete_modelmesh_payload(request_id, False)
278278

279279

280280
def reconcile_mismatching_shape_error(shape_tuples, payload_type, payload_id):
@@ -377,9 +377,9 @@ async def reconcile(
377377
metadata_dataset = output_payload.model_name + METADATA_SUFFIX
378378

379379
await asyncio.gather(
380-
storage_inferface.write_data(input_dataset, input_array, input_names),
381-
storage_inferface.write_data(output_dataset, output_array, output_names),
382-
storage_inferface.write_data(metadata_dataset, metadata, metadata_names),
380+
storage_interface.write_data(input_dataset, input_array, input_names),
381+
storage_interface.write_data(output_dataset, output_array, output_names),
382+
storage_interface.write_data(metadata_dataset, metadata, metadata_names),
383383
)
384384

385385
shapes = await ModelData(output_payload.model_name).shapes()
@@ -411,13 +411,13 @@ async def consume_cloud_event(
411411
else:
412412
logger.info(f"KServe Inference Input {payload.id} received.")
413413
# if a match is found, the payload is auto-deleted from data
414-
partial_output = await storage_inferface.get_partial_payload(
414+
partial_output = await storage_interface.get_partial_payload(
415415
payload.id, is_input=False
416416
)
417417
if partial_output is not None:
418418
await reconcile(payload, partial_output)
419419
else:
420-
await storage_inferface.persist_partial_payload(payload, is_input=True)
420+
await storage_interface.persist_partial_payload(payload, is_input=True)
421421
return {
422422
"status": "success",
423423
"message": f"Input payload {payload.id} processed successfully",
@@ -435,13 +435,13 @@ async def consume_cloud_event(
435435
logger.info(
436436
f"KServe Inference Output {payload.id} received from model={payload.model_name}."
437437
)
438-
partial_input = await storage_inferface.get_partial_payload(
438+
partial_input = await storage_interface.get_partial_payload(
439439
payload.id, is_input=True
440440
)
441441
if partial_input is not None:
442442
await reconcile(partial_input, payload)
443443
else:
444-
await storage_inferface.persist_partial_payload(payload, is_input=False)
444+
await storage_interface.persist_partial_payload(payload, is_input=False)
445445

446446
return {
447447
"status": "success",
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,21 @@
11
import os
2+
3+
from src.service.data.storage.maria.legacy_maria_reader import LegacyMariaDBStorageReader
4+
from src.service.data.storage.maria.maria import MariaDBStorage
25
from src.service.data.storage.pvc import PVCStorage
36

47
def get_storage_interface():
58
storage_format = os.environ.get("SERVICE_STORAGE_FORMAT", "PVC")
69
if storage_format == "PVC":
710
return PVCStorage(data_directory=os.environ.get("STORAGE_DATA_FOLDER", "/tmp"), data_file=os.environ.get("STORAGE_DATA_FILENAME", "trustyai.hdf5"))
11+
elif storage_format == "MARIA":
12+
return MariaDBStorage(
13+
user=os.environ.get("DATABASE_USERNAME"),
14+
password=os.environ.get("DATABASE_PASSWORD"),
15+
host=os.environ.get("DATABASE_HOST"),
16+
port=int(os.environ.get("DATABASE_PORT")),
17+
database=os.environ.get("DATABASE_DATABASE"),
18+
attempt_migration=True
19+
)
820
else:
921
raise ValueError(f"Storage format={storage_format} not yet supported by the Python implementation of the service.")

src/service/data/storage/maria/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)