Skip to content

Commit 813927a

Browse files
committed
Make tests pass with openml-services (#217)
Currently still maintain the relevant definition files in this repository to allow them to change independently for a little while when the server is under most active development. We can then consider which changes should be merged to services to reduce duplication again.
1 parent 7c0d5c3 commit 813927a

File tree

19 files changed

+298
-222
lines changed

19 files changed

+298
-222
lines changed

.github/workflows/tests.yml

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,37 +15,29 @@ jobs:
1515
compare-php:
1616
runs-on: ubuntu-latest
1717
steps:
18-
- uses: actions/checkout@v4
19-
- uses: actions/setup-python@v4
18+
- uses: actions/checkout@v6
19+
- uses: actions/setup-python@v6
2020
with:
2121
python-version: 3.x
2222

23-
# A naive `docker compose up` would first build the `python-api` container and then
24-
# start all services, which kickstarts Elastic Search and building indices.
25-
# But since those two steps are independent, we can parallelize them to save time.
26-
- run: |
27-
docker compose build python-api
28-
docker compose up -d --wait python-api php-api
29-
- run: docker container ls && docker image ls
30-
- run: docker exec python-api python -m pip freeze
31-
- run: docker exec python-api coverage run -m pytest -xv -m "php_api"
32-
- run: docker exec python-api coverage xml
23+
# https://github.com/docker/compose/issues/10596
24+
- run: docker compose --profile "python" --profile "php" up --detach --wait --remove-orphans || exit $(docker compose ps -q | xargs docker inspect -f '{{.State.ExitCode}}' | grep -v '^0' | wc -l)
25+
- run: docker exec openml-python-rest-api coverage run -m pytest -v -m "php_api"
26+
- run: docker exec openml-python-rest-api coverage xml
3327
- name: Upload results to Codecov
3428
uses: codecov/codecov-action@v4
3529
with:
3630
token: ${{ secrets.CODECOV_TOKEN }}
3731
python:
3832
runs-on: ubuntu-latest
3933
steps:
40-
- uses: actions/checkout@v4
41-
- uses: actions/setup-python@v4
34+
- uses: actions/checkout@v6
35+
- uses: actions/setup-python@v6
4236
with:
4337
python-version: 3.x
44-
- run: docker compose up -d --wait database python-api
45-
- run: docker container ls && docker image ls
46-
- run: docker exec python-api python -m pip freeze
47-
- run: docker exec python-api coverage run -m pytest -xv -m "not php_api"
48-
- run: docker exec python-api coverage xml
38+
- run: docker compose --profile "python" up --detach --wait --remove-orphans || exit $(docker compose ps -q | xargs docker inspect -f '{{.State.ExitCode}}' | grep -v '^0' | wc -l)
39+
- run: docker exec openml-python-rest-api coverage run -m pytest -v -m "not php_api"
40+
- run: docker exec openml-python-rest-api coverage xml
4941
- name: Upload results to Codecov
5042
uses: codecov/codecov-action@v4
5143
with:

docker-compose.yaml

Lines changed: 51 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,33 @@
11
services:
22
database:
3-
image: "openml/test-database"
3+
profiles: ["python", "php", "all"]
4+
image: "openml/test-database:20240105"
45
container_name: "openml-test-database"
56
environment:
67
MYSQL_ROOT_PASSWORD: ok
78
ports:
89
- "3306:3306"
10+
healthcheck:
11+
test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
12+
start_period: 30s
13+
start_interval: 1s
14+
timeout: 3s
15+
interval: 5s
16+
retries: 10
17+
18+
database-setup:
19+
profiles: ["python", "php", "all"]
20+
image: mysql
21+
container_name: "openml-test-database-setup"
22+
volumes:
23+
- ./docker/database/update.sh:/database-update.sh
24+
command: /bin/sh -c "/database-update.sh"
25+
depends_on:
26+
database:
27+
condition: service_healthy
928

1029
docs:
30+
profiles: ["all"]
1131
build:
1232
context: .
1333
dockerfile: docker/docs/Dockerfile
@@ -16,8 +36,35 @@ services:
1636
volumes:
1737
- .:/docs
1838

39+
elasticsearch:
40+
profiles: ["php", "all"]
41+
image: docker.elastic.co/elasticsearch/elasticsearch:6.8.23
42+
container_name: "openml-elasticsearch"
43+
platform: "linux/amd64"
44+
ports:
45+
- "9200:9200" # also known as /es (nginx)
46+
- "9300:9300"
47+
env_file: docker/elasticsearch/.env
48+
healthcheck:
49+
test: curl 127.0.0.1:9200/_cluster/health | grep -e "green"
50+
start_period: 30s
51+
start_interval: 5s
52+
timeout: 3s
53+
interval: 10s
54+
deploy:
55+
resources:
56+
limits:
57+
cpus: '1'
58+
memory: 1G
59+
reservations:
60+
cpus: '0.2'
61+
memory: 250M
62+
1963
php-api:
20-
image: "openml/php-rest-api"
64+
profiles: ["php", "all"]
65+
image: "openml/php-rest-api:v1.2.2"
66+
container_name: "openml-php-rest-api"
67+
env_file: docker/php/.env
2168
ports:
2269
- "8002:80"
2370
depends_on:
@@ -33,7 +80,8 @@ services:
3380
interval: 1m
3481

3582
python-api:
36-
container_name: "python-api"
83+
profiles: ["python", "all"]
84+
container_name: "openml-python-rest-api"
3785
build:
3886
context: .
3987
dockerfile: docker/python/Dockerfile
@@ -43,20 +91,3 @@ services:
4391
- .:/python-api
4492
depends_on:
4593
- database
46-
47-
elasticsearch:
48-
image: docker.elastic.co/elasticsearch/elasticsearch:6.8.23
49-
container_name: "elasticsearch"
50-
ports:
51-
- "9200:9200"
52-
- "9300:9300"
53-
environment:
54-
- ELASTIC_PASSWORD=default
55-
- discovery.type=single-node
56-
- xpack.security.enabled=false
57-
healthcheck:
58-
test: curl 127.0.0.1:9200/_cluster/health | grep -e "green"
59-
start_period: 30s
60-
start_interval: 5s
61-
timeout: 3s
62-
interval: 1m

docker/database/update.sh

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#/bin/bash
2+
# Change the filepath of openml.file
3+
# from "https://www.openml.org/data/download/1666876/phpFsFYVN"
4+
# to "http://minio:9000/datasets/0000/0001/phpFsFYVN"
5+
mysql -hdatabase -uroot -pok -e 'UPDATE openml.file SET filepath = CONCAT("http://minio:9000/datasets/0000/", LPAD(id, 4, "0"), "/", SUBSTRING_INDEX(filepath, "/", -1)) WHERE extension="arff";'
6+
7+
# Update openml.expdb.dataset with the same url
8+
mysql -hdatabase -uroot -pok -e 'UPDATE openml_expdb.dataset DS, openml.file FL SET DS.url = FL.filepath WHERE DS.did = FL.id;'
9+
10+
11+
12+
13+
14+
# Create the data_feature_description TABLE. TODO: can we make sure this table exists already?
15+
mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `data_feature_description` (
16+
`did` int unsigned NOT NULL,
17+
`index` int unsigned NOT NULL,
18+
`uploader` mediumint unsigned NOT NULL,
19+
`date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
20+
`description_type` enum("plain", "ontology") NOT NULL,
21+
`value` varchar(256) NOT NULL,
22+
KEY `did` (`did`,`index`),
23+
CONSTRAINT `data_feature_description_ibfk_1` FOREIGN KEY (`did`, `index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE
24+
)'
25+
26+
# SET dataset 1 to active (used in unittests java)
27+
mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'INSERT IGNORE INTO dataset_status VALUES (1, "active", "2024-01-01 00:00:00", 1)'
28+
mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'DELETE FROM dataset_status WHERE did = 2 AND status = "deactivated";'
29+
30+
# Temporary fix in case the database missed the kaggle table. The PHP Rest API expects the table to be there, while indexing.
31+
mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `kaggle` (`dataset_id` int(11) DEFAULT NULL, `kaggle_link` varchar(500) DEFAULT NULL)'

docker/elasticsearch/.env

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ELASTIC_PASSWORD=default
2+
discovery.type=single-node
3+
xpack.security.enabled=false

docker/php/.env

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
API_KEY=AD000000000000000000000000000000
2+
BASE_URL=http://php-api:80/
3+
MINIO_URL=http://minio:9000/
4+
DB_HOST_OPENML=database:3306
5+
DB_HOST_EXPDB=database:3306
6+
DB_USER_OPENML=root
7+
DB_PASS_OPENML=ok
8+
DB_USER_EXPDB_WRITE=root
9+
DB_PASS_EXPDB_WRITE=ok
10+
DB_USER_EXPDB_READ=root
11+
DB_PASS_EXPDB_READ=ok
12+
ES_URL=elasticsearch:9200
13+
ES_PASSWORD=default
14+
INDEX_ES_DURING_STARTUP=false

src/config.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
TomlTable = dict[str, typing.Any]
1010

11+
CONFIG_PATH = Path(__file__).parent / "config.toml"
12+
1113

1214
def _apply_defaults_to_siblings(configuration: TomlTable) -> TomlTable:
1315
defaults = configuration["defaults"]
@@ -19,9 +21,17 @@ def _apply_defaults_to_siblings(configuration: TomlTable) -> TomlTable:
1921

2022

2123
@functools.cache
22-
def load_database_configuration(file: Path = Path(__file__).parent / "config.toml") -> TomlTable:
23-
configuration = tomllib.loads(file.read_text())
24+
def _load_configuration(file: Path) -> TomlTable:
25+
return typing.cast(TomlTable, tomllib.loads(file.read_text()))
26+
27+
28+
def load_routing_configuration(file: Path = CONFIG_PATH) -> TomlTable:
29+
return typing.cast(TomlTable, _load_configuration(file)["routing"])
2430

31+
32+
@functools.cache
33+
def load_database_configuration(file: Path = CONFIG_PATH) -> TomlTable:
34+
configuration = _load_configuration(file)
2535
database_configuration = _apply_defaults_to_siblings(
2636
configuration["databases"],
2737
)

src/config.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,7 @@ database="openml_expdb"
1515

1616
[databases.openml]
1717
database="openml"
18+
19+
[routing]
20+
minio_url="http://minio:9000/"
21+
server_url="http://php-api:80/"

src/core/formatting.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from sqlalchemy.engine import Row
44

5-
from config import load_configuration
5+
from config import load_routing_configuration
66
from core.errors import DatasetError
77
from schemas.datasets.openml import DatasetFileFormat
88

@@ -25,15 +25,16 @@ def _format_parquet_url(dataset: Row) -> str | None:
2525
if dataset.format.lower() != DatasetFileFormat.ARFF:
2626
return None
2727

28-
minio_base_url = load_configuration()["minio_base_url"]
29-
prefix = dataset.did // 10_000
30-
return f"{minio_base_url}/datasets/{prefix:04d}/{dataset.did:04d}/dataset_{dataset.did}.pq"
28+
minio_base_url = load_routing_configuration()["minio_url"]
29+
ten_thousands_prefix = f"{dataset.did // 10_000:04d}"
30+
padded_id = f"{dataset.did:04d}"
31+
return f"{minio_base_url}datasets/{ten_thousands_prefix}/{padded_id}/dataset_{dataset.did}.pq"
3132

3233

3334
def _format_dataset_url(dataset: Row) -> str:
34-
base_url = load_configuration()["arff_base_url"]
35+
base_url = load_routing_configuration()["server_url"]
3536
filename = f"{html.escape(dataset.name)}.{dataset.format.lower()}"
36-
return f"{base_url}/data/v1/download/{dataset.file_id}/{filename}"
37+
return f"{base_url}data/v1/download/{dataset.file_id}/{filename}"
3738

3839

3940
def _safe_unquote(text: str | None) -> str | None:

src/routers/openml/flows.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,8 @@ def get_flow(flow_id: int, expdb: Annotated[Connection, Depends(expdb_connection
4949
]
5050

5151
tags = database.flows.get_tags(flow_id, expdb)
52-
flow_rows = database.flows.get_subflows(for_flow=flow_id, expdb=expdb)
53-
subflows = [
54-
{
55-
"identifier": flow.identifier,
56-
"flow": get_flow(flow_id=flow.child_id, expdb=expdb),
57-
}
58-
for flow in flow_rows
59-
]
52+
flow_rows = database.flows.get_subflows(flow_id, expdb)
53+
subflows = [get_flow(flow_id=flow.child_id, expdb=expdb) for flow in flow_rows]
6054

6155
return Flow(
6256
id_=flow.id,

src/routers/openml/tasks.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from fastapi import APIRouter, Depends, HTTPException
88
from sqlalchemy import Connection, RowMapping, text
99

10+
import config
1011
import database.datasets
1112
import database.tasks
1213
from routers.dependencies import expdb_connection
@@ -139,7 +140,8 @@ def _fill_json_template(
139140
# I believe that the operations below are always part of string output, so
140141
# we don't need to be careful to avoid losing typedness
141142
template = template.replace("[TASK:id]", str(task.task_id))
142-
return template.replace("[CONSTANT:base_url]", "https://test.openml.org/")
143+
server_url = config.load_routing_configuration()["server_url"]
144+
return template.replace("[CONSTANT:base_url]", server_url)
143145

144146

145147
@router.get("/{task_id}")

0 commit comments

Comments
 (0)