Skip to content

Commit c0ad885

Browse files
authored
Merge pull request #59 from PySport/feat/mysql-support
Test against mysql + fix for mysql
2 parents a46639c + 6da4ab5 commit c0ad885

File tree

10 files changed

+202
-93
lines changed

10 files changed

+202
-93
lines changed

.github/workflows/test.yml

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ on:
99
- main
1010

1111
jobs:
12-
build:
12+
test-sqlite:
1313
runs-on: ${{ matrix.os }}
1414
strategy:
1515
matrix:
@@ -33,3 +33,46 @@ jobs:
3333
- name: Test with pytest
3434
run: |
3535
pytest --color=yes
36+
37+
test-mysql:
38+
runs-on: ubuntu-latest
39+
strategy:
40+
matrix:
41+
python-version: [3.9, "3.10", "3.11", "3.12"]
42+
43+
services:
44+
mysql:
45+
image: mysql:8.0
46+
env:
47+
MYSQL_ROOT_PASSWORD: root
48+
MYSQL_DATABASE: ingestify_test
49+
ports:
50+
- 3306:3306
51+
options: >-
52+
--health-cmd="mysqladmin ping"
53+
--health-interval=10s
54+
--health-timeout=5s
55+
--health-retries=3
56+
57+
steps:
58+
- uses: actions/checkout@v4
59+
- name: Set up Python ${{ matrix.python-version }}
60+
uses: actions/setup-python@v5
61+
with:
62+
python-version: ${{ matrix.python-version }}
63+
- name: Install dependencies
64+
run: |
65+
python -m pip install --upgrade pip
66+
pip install -e ".[test]"
67+
- name: Install MySQL dependencies
68+
run: |
69+
pip install mysqlclient
70+
- name: Code formatting
71+
run: |
72+
pip install black==22.3.0
73+
black --check .
74+
- name: Test with pytest
75+
env:
76+
INGESTIFY_TEST_DATABASE_URL: mysql://root:root@127.0.0.1:3306/ingestify_test
77+
run: |
78+
pytest --color=yes

ingestify/infra/store/dataset/sqlalchemy/repository.py

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def __init__(self, url: str, table_prefix: str = ""):
130130
self._init_engine()
131131

132132
# Create all tables in the database
133-
self.metadata.create_all(self.engine)
133+
self.create_all_tables()
134134

135135
def __del__(self):
136136
self.close()
@@ -143,6 +143,14 @@ def close(self):
143143
if hasattr(self, "engine"):
144144
self.engine.dispose()
145145

146+
def create_all_tables(self):
147+
self.metadata.create_all(self.engine)
148+
149+
def drop_all_tables(self):
150+
"""Drop all tables in the database. Useful for test cleanup."""
151+
if hasattr(self, "metadata") and hasattr(self, "engine"):
152+
self.metadata.drop_all(self.engine)
153+
146154
def get(self):
147155
return self.session()
148156

@@ -208,18 +216,33 @@ def _upsert(
208216

209217
primary_key_columns = [column for column in table.columns if column.primary_key]
210218

211-
if immutable_rows:
212-
stmt = stmt.on_conflict_do_nothing(index_elements=primary_key_columns)
219+
if dialect == "mysql":
220+
# MySQL uses ON DUPLICATE KEY UPDATE syntax
221+
if immutable_rows:
222+
# For MySQL immutable rows, use INSERT IGNORE to skip duplicates
223+
stmt = stmt.prefix_with("IGNORE")
224+
else:
225+
# MySQL uses stmt.inserted instead of stmt.excluded
226+
set_ = {
227+
name: stmt.inserted[name]
228+
for name, column in table.columns.items()
229+
if column not in primary_key_columns
230+
}
231+
stmt = stmt.on_duplicate_key_update(set_)
213232
else:
214-
set_ = {
215-
name: getattr(stmt.excluded, name)
216-
for name, column in table.columns.items()
217-
if column not in primary_key_columns
218-
}
219-
220-
stmt = stmt.on_conflict_do_update(
221-
index_elements=primary_key_columns, set_=set_
222-
)
233+
# PostgreSQL and SQLite use ON CONFLICT syntax
234+
if immutable_rows:
235+
stmt = stmt.on_conflict_do_nothing(index_elements=primary_key_columns)
236+
else:
237+
set_ = {
238+
name: getattr(stmt.excluded, name)
239+
for name, column in table.columns.items()
240+
if column not in primary_key_columns
241+
}
242+
243+
stmt = stmt.on_conflict_do_update(
244+
index_elements=primary_key_columns, set_=set_
245+
)
223246

224247
connection.execute(stmt)
225248

@@ -242,7 +265,8 @@ def _build_cte_sqlite(self, records, name: str) -> CTE:
242265
def _build_cte(self, records: list[dict], name: str) -> CTE:
243266
"""Build a CTE from a list of dictionaries."""
244267

245-
if self.dialect.name == "sqlite":
268+
if self.dialect.name in ("sqlite", "mysql"):
269+
# SQLite and MySQL don't support VALUES syntax, use UNION ALL instead
246270
return self._build_cte_sqlite(records, name)
247271

248272
first_row = records[0]

ingestify/infra/store/dataset/sqlalchemy/tables.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,31 @@ class TZDateTime(TypeDecorator):
5151
LOCAL_TIMEZONE = datetime.datetime.utcnow().astimezone().tzinfo
5252
cache_ok = True
5353

54+
def __init__(self, fsp=None, **kwargs):
55+
super().__init__(**kwargs)
56+
self.fsp = fsp
57+
58+
def load_dialect_impl(self, dialect):
59+
# For MySQL, use DATETIME with fractional seconds precision
60+
if dialect.name == "mysql" and self.fsp is not None:
61+
from sqlalchemy.dialects.mysql import DATETIME as MySQL_DATETIME
62+
63+
# Return the type without type_descriptor to ensure our process methods are called
64+
return MySQL_DATETIME(fsp=self.fsp)
65+
return super().load_dialect_impl(dialect)
66+
5467
def process_bind_param(self, value: Optional[datetime.datetime], dialect):
5568
if not value:
5669
return None
5770

5871
if value.tzinfo is None:
59-
value = value.astimezone(self.LOCAL_TIMEZONE)
72+
# Assume naive datetimes are already in UTC
73+
value = value.replace(tzinfo=datetime.timezone.utc)
74+
else:
75+
# Convert timezone-aware datetimes to UTC
76+
value = value.astimezone(datetime.timezone.utc)
6077

61-
return value.astimezone(datetime.timezone.utc)
78+
return value
6279

6380
def process_result_value(self, value, dialect):
6481
if not value:

ingestify/tests/config.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
main:
2-
# Cannot use in memory data because database is shared between processes
3-
metadata_url: !ENV "sqlite:///${TEST_DIR}/main.db"
2+
# For MySQL tests, INGESTIFY_TEST_DATABASE_URL will be set
3+
# For SQLite tests, falls back to using TEST_DIR
4+
metadata_url: !ENV ${INGESTIFY_TEST_DATABASE_URL}
5+
metadata_options:
6+
table_prefix: !ENV ${INGESTIFY_TEST_DATABASE_PREFIX}_
47
file_url: !ENV file://${TEST_DIR}/data
58
default_bucket: main
69

ingestify/tests/conftest.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
import tempfile
22

3-
import pytest
43
import os
54

5+
import pytest
6+
7+
from ingestify.main import get_engine
8+
69

710
@pytest.fixture(scope="function", autouse=True)
811
def datastore_dir():
@@ -12,6 +15,45 @@ def datastore_dir():
1215
yield tmpdirname
1316

1417

15-
@pytest.fixture(scope="session")
16-
def config_file():
18+
@pytest.fixture(scope="function")
19+
def ingestify_test_database_url(datastore_dir, monkeypatch):
20+
key = "INGESTIFY_TEST_DATABASE_URL"
21+
22+
value = os.environ.get(key)
23+
if value is None:
24+
value = f"sqlite:///{datastore_dir}/main.db"
25+
monkeypatch.setenv(key, value)
26+
27+
yield value
28+
29+
30+
@pytest.fixture(scope="function")
31+
def config_file(ingestify_test_database_url):
32+
# Depend on ingestify_test_database_url to make sure environment variables are set in time, also make sure database is
33+
# cleaned before ingestify opens a connection
1734
return os.path.abspath(os.path.dirname(__file__) + "/config.yaml")
35+
36+
37+
@pytest.fixture
38+
def db_cleanup():
39+
def do_cleanup(engine):
40+
# # Close connections after test
41+
session_provider = getattr(
42+
engine.store.dataset_repository, "session_provider", None
43+
)
44+
if session_provider:
45+
session_provider.session.remove()
46+
session_provider.engine.dispose()
47+
session_provider.drop_all_tables()
48+
49+
return do_cleanup
50+
51+
52+
@pytest.fixture(scope="function")
53+
def engine(config_file, db_cleanup):
54+
# Now create the engine for the test
55+
engine = get_engine(config_file, "main")
56+
57+
yield engine
58+
59+
db_cleanup(engine)

ingestify/tests/test_auto_ingest.py

Lines changed: 13 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from ingestify.domain.models.fetch_policy import FetchPolicy
99
from ingestify.domain import Selector, DataSpecVersionCollection
1010
from ingestify import Source, DatasetResource
11+
from ingestify.utils import utcnow
1112

1213

1314
class MockSource(Source):
@@ -39,7 +40,7 @@ def find_datasets(
3940
url="http://test.com/match1",
4041
).add_file(
4142
data_feed_key="test",
42-
last_modified=datetime.datetime.now(),
43+
last_modified=utcnow(),
4344
json_content={"blaat": "piet"},
4445
)
4546

@@ -75,7 +76,7 @@ def find_datasets(
7576
url="http://test.com/match1",
7677
).add_file(
7778
data_feed_key="test",
78-
last_modified=datetime.datetime.now(),
79+
last_modified=utcnow(),
7980
json_content={"competition_id": 11},
8081
)
8182
elif competition_id == 22:
@@ -91,7 +92,7 @@ def find_datasets(
9192
url="http://test.com/match2",
9293
).add_file(
9394
data_feed_key="test",
94-
last_modified=datetime.datetime.now(),
95+
last_modified=utcnow(),
9596
json_content={"competition_id": 22},
9697
)
9798

@@ -106,10 +107,8 @@ def discover_selectors(self, dataset_type: str):
106107
]
107108

108109

109-
def test_iter_datasets_basic_auto_ingest(config_file):
110+
def test_iter_datasets_basic_auto_ingest(engine):
110111
"""Test basic auto-ingest functionality."""
111-
engine = get_engine(config_file)
112-
113112
# Add a simple ingestion plan
114113
mock_source = MockSource(name="test_source")
115114
data_spec_versions = DataSpecVersionCollection.from_dict({"default": {"v1"}})
@@ -141,20 +140,16 @@ def test_iter_datasets_basic_auto_ingest(config_file):
141140
assert datasets[0].identifier["competition_id"] == 11
142141

143142

144-
def test_iter_datasets_auto_ingest_disabled(config_file):
143+
def test_iter_datasets_auto_ingest_disabled(engine):
145144
"""Test that auto_ingest=False returns only existing datasets."""
146-
engine = get_engine(config_file)
147-
148145
# Should only return existing datasets (none in empty store)
149146
datasets = list(engine.iter_datasets(competition_id=11, auto_ingest=False))
150147

151148
assert len(datasets) == 0
152149

153150

154-
def test_iter_datasets_outside_config_scope(config_file):
151+
def test_iter_datasets_outside_config_scope(engine):
155152
"""Test that requests outside IngestionPlan scope return nothing."""
156-
engine = get_engine(config_file)
157-
158153
# Add plan only for competition_id=11
159154
mock_source = MockSource(name="test_source")
160155
data_spec_versions = DataSpecVersionCollection.from_dict({"default": {"v1"}})
@@ -180,10 +175,8 @@ def test_iter_datasets_outside_config_scope(config_file):
180175
assert len(datasets) == 0
181176

182177

183-
def test_iter_datasets_discover_selectors_with_filters(config_file):
178+
def test_iter_datasets_discover_selectors_with_filters(engine):
184179
"""Test that selector_filters are applied after discover_selectors runs."""
185-
engine = get_engine(config_file)
186-
187180
# Create an IngestionPlan with empty selector - this will trigger discover_selectors
188181
mock_source = MockSourceWithDiscoverSelectors(name="test_source_discover")
189182
data_spec_versions = DataSpecVersionCollection.from_dict({"default": {"v1"}})
@@ -216,10 +209,8 @@ def test_iter_datasets_discover_selectors_with_filters(config_file):
216209
assert datasets[0].name == "Mock match comp 11"
217210

218211

219-
def test_iter_datasets_discover_selectors_multiple_matches(config_file):
212+
def test_iter_datasets_discover_selectors_multiple_matches(engine):
220213
"""Test that multiple discovered selectors can match the filters."""
221-
engine = get_engine(config_file)
222-
223214
# Create an IngestionPlan with empty selector - this will trigger discover_selectors
224215
mock_source = MockSourceWithDiscoverSelectors(name="test_source_discover")
225216
data_spec_versions = DataSpecVersionCollection.from_dict({"default": {"v1"}})
@@ -248,12 +239,10 @@ def test_iter_datasets_discover_selectors_multiple_matches(config_file):
248239
assert competition_ids == {11, 22}
249240

250241

251-
def test_selector_filters_make_discovered_selectors_more_strict(config_file):
242+
def test_selector_filters_make_discovered_selectors_more_strict(engine):
252243
"""Test that when selector_filters are more strict than discovered selectors, we make the selectors more strict."""
253244
from unittest.mock import Mock
254245

255-
engine = get_engine(config_file)
256-
257246
# Create a source that returns multiple matches per season
258247
class MockSourceMultipleMatches(Source):
259248
@property
@@ -291,7 +280,7 @@ def find_datasets(
291280
url=f"http://test.com/match{mid}",
292281
).add_file(
293282
data_feed_key="test",
294-
last_modified=datetime.datetime.now(),
283+
last_modified=utcnow(),
295284
json_content={"match_id": mid},
296285
)
297286
return []
@@ -348,13 +337,11 @@ def discover_selectors(self, dataset_type):
348337
# Without this optimization, we'd call with match_id=None and fetch 3 matches instead of 1
349338

350339

351-
def test_iter_datasets_with_open_data_auto_discovery(config_file):
340+
def test_iter_datasets_with_open_data_auto_discovery(engine):
352341
"""Test that use_open_data=True auto-discovers open data sources without configuration."""
353342
from unittest.mock import Mock
354343
from ingestify.application import loader
355344

356-
engine = get_engine(config_file)
357-
358345
# Create mock source class that inherits from Source
359346
class MockOpenDataSource(Source):
360347
def __init__(self, name):
@@ -387,7 +374,7 @@ def find_datasets(
387374
url="http://open-data.com/match123",
388375
).add_file(
389376
data_feed_key="test",
390-
last_modified=datetime.datetime.now(),
377+
last_modified=utcnow(),
391378
json_content={"match_id": 123},
392379
)
393380

0 commit comments

Comments
 (0)