From 8a07c58961a3d40369bc420a98fd20af0df5471b Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 09:13:41 -0500 Subject: [PATCH 01/49] start using datetimes with fractional seconds with mysql --- .../mlos_bench/storage/sql/experiment.py | 6 +--- mlos_bench/mlos_bench/storage/sql/schema.py | 31 +++++++++++++++---- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index acc2a497b4..7a6dbf75b4 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -367,11 +367,7 @@ def _new_trial( ts_start: datetime | None = None, config: dict[str, Any] | None = None, ) -> Storage.Trial: - # MySQL can round microseconds into the future causing scheduler to skip trials. - # Truncate microseconds to avoid this issue. - ts_start = utcify_timestamp(ts_start or datetime.now(UTC), origin="local").replace( - microsecond=0 - ) + ts_start = utcify_timestamp(ts_start or datetime.now(UTC), origin="local") _LOG.debug("Create trial: %s:%d @ %s", self._experiment_id, self._trial_id, ts_start) with self._engine.begin() as conn: try: diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 2bc00f0082..3cbb63bcc4 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,6 +39,7 @@ create_mock_engine, inspect, ) +from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -104,8 +105,8 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime), - Column("ts_end", DateTime), + Column("ts_start", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), + Column("ts_end", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._STATUS_LEN)), @@ -179,8 +180,16 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column("ts_start", DateTime, nullable=False), - Column("ts_end", DateTime), + Column( + "ts_start", + DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=False, + ), + Column( + "ts_end", + DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=True, + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._STATUS_LEN), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -232,7 +241,12 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=False, + default="now", + ), Column("status", String(self._STATUS_LEN), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -267,7 +281,12 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=False, + default="now", + ), Column("metric_id", String(self._ID_LEN), nullable=False), Column("metric_value", String(self._METRIC_VALUE_LEN)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From d6fc9e279bdf0e5c932640ef58617d08a6b6bb00 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 09:18:40 -0500 Subject: [PATCH 02/49] adjust tests to also check for fractional time --- .../tests/environments/local/composite_local_env_test.py | 5 ++--- .../tests/environments/local/local_env_telemetry_test.py | 8 ++------ 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/environments/local/composite_local_env_test.py b/mlos_bench/mlos_bench/tests/environments/local/composite_local_env_test.py index 033fa16330..64f1df273c 100644 --- a/mlos_bench/mlos_bench/tests/environments/local/composite_local_env_test.py +++ b/mlos_bench/mlos_bench/tests/environments/local/composite_local_env_test.py @@ -17,8 +17,8 @@ def _format_str(zone_info: tzinfo | None) -> str: if zone_info is not None: - return "%Y-%m-%d %H:%M:%S %z" - return "%Y-%m-%d %H:%M:%S" + return "%Y-%m-%d %H:%M:%S.%f %z" + return "%Y-%m-%d %H:%M:%S.%f" # FIXME: This fails with zone_info = None when run with `TZ="America/Chicago pytest -n0 ...` @@ -34,7 +34,6 @@ def test_composite_env(tunable_groups: TunableGroups, zone_info: tzinfo | None) See Also: http://github.com/microsoft/MLOS/issues/501 """ ts1 = datetime.now(zone_info) - ts1 -= timedelta(microseconds=ts1.microsecond) # Round to a second ts2 = ts1 + timedelta(minutes=2) format_str = _format_str(zone_info) diff --git a/mlos_bench/mlos_bench/tests/environments/local/local_env_telemetry_test.py b/mlos_bench/mlos_bench/tests/environments/local/local_env_telemetry_test.py index a654ed1f34..2bc789485a 100644 --- a/mlos_bench/mlos_bench/tests/environments/local/local_env_telemetry_test.py +++ b/mlos_bench/mlos_bench/tests/environments/local/local_env_telemetry_test.py @@ -16,8 +16,8 @@ def _format_str(zone_info: tzinfo | None) -> str: if zone_info is not None: - return "%Y-%m-%d %H:%M:%S %z" - return "%Y-%m-%d %H:%M:%S" + return "%Y-%m-%d %H:%M:%S.%f %z" + return "%Y-%m-%d %H:%M:%S.%f" # FIXME: This fails with zone_info = None when run with `TZ="America/Chicago pytest -n0 ...` @@ -25,7 +25,6 @@ def _format_str(zone_info: tzinfo | None) -> str: def test_local_env_telemetry(tunable_groups: TunableGroups, zone_info: tzinfo | None) -> None: """Produce benchmark and telemetry data in a local script and read it.""" ts1 = datetime.now(zone_info) - ts1 -= timedelta(microseconds=ts1.microsecond) # Round to a second ts2 = ts1 + timedelta(minutes=1) format_str = _format_str(zone_info) @@ -77,7 +76,6 @@ def test_local_env_telemetry_no_header( ) -> None: """Read the telemetry data with no header.""" ts1 = datetime.now(zone_info) - ts1 -= timedelta(microseconds=ts1.microsecond) # Round to a second ts2 = ts1 + timedelta(minutes=1) format_str = _format_str(zone_info) @@ -121,7 +119,6 @@ def test_local_env_telemetry_wrong_header( ) -> None: """Read the telemetry data with incorrect header.""" ts1 = datetime.now(zone_info) - ts1 -= timedelta(microseconds=ts1.microsecond) # Round to a second ts2 = ts1 + timedelta(minutes=1) format_str = _format_str(zone_info) @@ -150,7 +147,6 @@ def test_local_env_telemetry_invalid(tunable_groups: TunableGroups) -> None: """Fail when the telemetry data has wrong format.""" zone_info = UTC ts1 = datetime.now(zone_info) - ts1 -= timedelta(microseconds=ts1.microsecond) # Round to a second ts2 = ts1 + timedelta(minutes=1) format_str = _format_str(zone_info) From 65e7f08578ede928d5bb5ca16b499f7276f05e57 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 09:19:17 -0500 Subject: [PATCH 03/49] Revert "start using datetimes with fractional seconds with mysql" This reverts commit 8a07c58961a3d40369bc420a98fd20af0df5471b. --- .../mlos_bench/storage/sql/experiment.py | 6 +++- mlos_bench/mlos_bench/storage/sql/schema.py | 31 ++++--------------- 2 files changed, 11 insertions(+), 26 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index 7a6dbf75b4..acc2a497b4 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -367,7 +367,11 @@ def _new_trial( ts_start: datetime | None = None, config: dict[str, Any] | None = None, ) -> Storage.Trial: - ts_start = utcify_timestamp(ts_start or datetime.now(UTC), origin="local") + # MySQL can round microseconds into the future causing scheduler to skip trials. + # Truncate microseconds to avoid this issue. + ts_start = utcify_timestamp(ts_start or datetime.now(UTC), origin="local").replace( + microsecond=0 + ) _LOG.debug("Create trial: %s:%d @ %s", self._experiment_id, self._trial_id, ts_start) with self._engine.begin() as conn: try: diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 3cbb63bcc4..2bc00f0082 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,7 +39,6 @@ create_mock_engine, inspect, ) -from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -105,8 +104,8 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), - Column("ts_end", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), + Column("ts_start", DateTime), + Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._STATUS_LEN)), @@ -180,16 +179,8 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column( - "ts_start", - DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=False, - ), - Column( - "ts_end", - DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=True, - ), + Column("ts_start", DateTime, nullable=False), + Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._STATUS_LEN), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -241,12 +232,7 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("status", String(self._STATUS_LEN), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -281,12 +267,7 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("metric_id", String(self._ID_LEN), nullable=False), Column("metric_value", String(self._METRIC_VALUE_LEN)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From b7dad429bfba4e7ec55ab7e05a1a7b2ef39c11be Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 09:19:53 -0500 Subject: [PATCH 04/49] Reapply "start using datetimes with fractional seconds with mysql" This reverts commit 65e7f08578ede928d5bb5ca16b499f7276f05e57. --- .../mlos_bench/storage/sql/experiment.py | 6 +--- mlos_bench/mlos_bench/storage/sql/schema.py | 31 +++++++++++++++---- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index acc2a497b4..7a6dbf75b4 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -367,11 +367,7 @@ def _new_trial( ts_start: datetime | None = None, config: dict[str, Any] | None = None, ) -> Storage.Trial: - # MySQL can round microseconds into the future causing scheduler to skip trials. - # Truncate microseconds to avoid this issue. - ts_start = utcify_timestamp(ts_start or datetime.now(UTC), origin="local").replace( - microsecond=0 - ) + ts_start = utcify_timestamp(ts_start or datetime.now(UTC), origin="local") _LOG.debug("Create trial: %s:%d @ %s", self._experiment_id, self._trial_id, ts_start) with self._engine.begin() as conn: try: diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 2bc00f0082..3cbb63bcc4 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,6 +39,7 @@ create_mock_engine, inspect, ) +from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -104,8 +105,8 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime), - Column("ts_end", DateTime), + Column("ts_start", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), + Column("ts_end", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._STATUS_LEN)), @@ -179,8 +180,16 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column("ts_start", DateTime, nullable=False), - Column("ts_end", DateTime), + Column( + "ts_start", + DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=False, + ), + Column( + "ts_end", + DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=True, + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._STATUS_LEN), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -232,7 +241,12 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=False, + default="now", + ), Column("status", String(self._STATUS_LEN), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -267,7 +281,12 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._ID_LEN), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + nullable=False, + default="now", + ), Column("metric_id", String(self._ID_LEN), nullable=False), Column("metric_value", String(self._METRIC_VALUE_LEN)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From 4bf0c5f69966ade2a37cf6665b2385f58853e3e0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 16:35:00 -0500 Subject: [PATCH 05/49] apply black on alembic commit --- mlos_bench/mlos_bench/storage/sql/alembic.ini | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index 4d2a1120c5..f5a249fd7a 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -72,10 +72,10 @@ sqlalchemy.url = sqlite:///mlos_bench.sqlite # detail and examples # format using "black" - use the console_scripts runner, against the "black" entrypoint -# hooks = black -# black.type = console_scripts -# black.entrypoint = black -# black.options = -l 79 REVISION_SCRIPT_FILENAME +hooks = black +black.type = console_scripts +black.entrypoint = black +black.options = REVISION_SCRIPT_FILENAME # lint with attempts to fix using "ruff" - use the exec runner, execute a binary # hooks = ruff From 8088734e20baa181c4eed46fe475a2a4649e6df5 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 11:18:46 -0500 Subject: [PATCH 06/49] preparing to support testing multiple backend engines for schema changes --- .../mlos_bench/config/storage/mysql.jsonc | 2 +- .../config/storage/postgresql.jsonc | 2 +- mlos_bench/mlos_bench/storage/sql/alembic.ini | 3 + .../mlos_bench/storage/sql/alembic/README.md | 115 +++++++++++++++--- .../tests/storage/test_storage_schemas.py | 1 + 5 files changed, 106 insertions(+), 17 deletions(-) diff --git a/mlos_bench/mlos_bench/config/storage/mysql.jsonc b/mlos_bench/mlos_bench/config/storage/mysql.jsonc index c133b379ba..15986de4c2 100644 --- a/mlos_bench/mlos_bench/config/storage/mysql.jsonc +++ b/mlos_bench/mlos_bench/config/storage/mysql.jsonc @@ -6,7 +6,7 @@ "log_sql": false, // Write all SQL statements to the log. // Parameters below must match kwargs of `sqlalchemy.URL.create()`: "drivername": "mysql+mysqlconnector", - "database": "osat", + "database": "mlos_bench", "username": "root", "password": "PLACERHOLDER PASSWORD", // Comes from global config "host": "localhost", diff --git a/mlos_bench/mlos_bench/config/storage/postgresql.jsonc b/mlos_bench/mlos_bench/config/storage/postgresql.jsonc index cd1214835c..1cff2d76b7 100644 --- a/mlos_bench/mlos_bench/config/storage/postgresql.jsonc +++ b/mlos_bench/mlos_bench/config/storage/postgresql.jsonc @@ -8,7 +8,7 @@ "log_sql": false, // Write all SQL statements to the log. // Parameters below must match kwargs of `sqlalchemy.URL.create()`: "drivername": "postgresql+psycopg2", - "database": "osat", + "database": "mlos_bench", "username": "postgres", "password": "PLACERHOLDER PASSWORD", // Comes from global config "host": "localhost", diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index f5a249fd7a..6cc1e4b7c8 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -63,7 +63,10 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # output_encoding = utf-8 # See README.md for details. +# Uncomment one of these: sqlalchemy.url = sqlite:///mlos_bench.sqlite +#sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench +#sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench [post_write_hooks] diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md index ec35eb70f6..9a9aafe1f4 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/README.md +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -4,49 +4,134 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla ## Overview -1. Create a blank `mlos_bench.sqlite` database file in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: +1. Create a blank database instance in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: - ```sh - cd mlos_bench/storage/sql - rm mlos_bench.sqlite - mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only - ``` + This allows `alembic` to automatically generate a migration script from the current schema. + + > NOTE: If your schema changes target a particular backend engine (e.g., using `with_variant`) you will need to use an engine with that config for this step. + > \ + > In the remainder of this document we should some examples for different DB types. + > Pick the one you're targeting and stick with it thru the example. + > You may need to repeat the process several times to test all of them. + > - [ ] TODO: Add scripts to automatically do this for several different backend engines all at once. + + For instance: + + 1. Start a temporary server either as a local file or in a docker instance + + ```sh + # sqlite + cd mlos_bench/storage/sql + rm -f mlos_bench.sqlite + ``` + + ```sh + # mysql + docker run -it --rm --name mysql-alembic --env MYSQL_ROOT_PASSWORD=password --env MYSQL_DATABASE=mlos_bench -p 3306:3306 mysql:latest + ``` - > This allows `alembic` to automatically generate a migration script from the current schema. + ```sh + # postgres + docker run -it --rm --name postgres-alembic --env POSTGRES_PASSWORD=password --env POSTGRES_DB=mlos_bench -p 5432:5432 postgres:latest + ``` -1. Adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. + 1. Adjust the `sqlalchemy.url` in the [`alembic.ini`](../alembic.ini) file. + + ```ini + # Uncomment one of these. + # See README.md for details. + + #sqlalchemy.url = sqlite:///mlos_bench.sqlite + sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench + #sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench + ``` + + 1. Prime the DB schema + + ```sh + # sqlite + mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # mysql + mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # postgres + mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password + ``` + +2. Now, adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. > Keep each change small and atomic. > For example, if you want to add a new column, do that in one change. > If you want to rename a column, do that in another change. -1. Generate a new migration script with the following command: +3. Generate a new migration script with the following command: ```sh - alembic revision --autogenerate -m "Descriptive text about the change." + alembic revision --autogenerate -m "CHANGEME: Descriptive text about the change." ``` -1. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. +4. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. -1. Verify that the migration script works by running the following command: +5. Verify that the migration script works by running the following command: ```sh + # sqlite mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only ``` + ```sh + # mysql: + mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # postgres: + mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password + ``` + > Normally this would be done with `alembic upgrade head`, but this command is convenient to ensure if will work with the `mlos_bench` command line interface as well. Examine the results using something like: ```sh + # For sqlite: sqlite3 mlos_bench.sqlite .schema sqlite3 mlos_bench.sqlite "SELECT * FROM alembic_version;" ``` -1. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. + ```sh + # For mysql: + mysql --user root --password=password --host localhost --protocol tcp --database mlos_bench -e "SHOW TABLES; SELECT * FROM alembic_version;" + ``` + + ```sh + # For postgres: + PGPASSWORD=password psql -h localhost -p 5432 -U postgres mlos_bench -c "SELECT table_name FROM information_schema.tables WHERE table_schema='public' and table_catalog='mlos_bench'; SELECT * FROM alembic_version;" + ``` + + > Use different CLI clients for targeting other engines. + +6. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. > Be sure to update the latest version in the [`test_storage_schemas.py`](../../../tests/storage/test_storage_schemas.py) file as well. -1. Merge that to the `main` branch. +7. Cleanup any server instances you started. + + For instance: + + ```sh + rm mlos_bench/storage/sql/mlos_bench.sqlite + ``` + + ```sh + docker kill mysql-alembic + ``` + +8. Merge that to the `main` branch. -1. Might be good to cut a new `mlos_bench` release at this point as well. +9. Might be good to cut a new `mlos_bench` release at this point as well. diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py index 8a6c36e6bb..1cbdaa9c6a 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py @@ -14,6 +14,7 @@ # See Also: schema.py for an example of programmatic alembic config access. CURRENT_ALEMBIC_HEAD = "8928a401115b" +# TODO: Test other engines as well. def test_storage_schemas(storage: SqlStorage) -> None: """Test storage schema creation.""" From a2c32562fd879d241a5d1a6335492c7d933358d3 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 16:34:31 -0500 Subject: [PATCH 07/49] refactoring storage tests to check other db engines --- mlos_bench/mlos_bench/storage/sql/schema.py | 25 ++++ mlos_bench/mlos_bench/storage/sql/storage.py | 27 ++++ mlos_bench/mlos_bench/tests/__init__.py | 24 +++ .../test_load_scheduler_config_examples.py | 4 +- mlos_bench/mlos_bench/tests/conftest.py | 1 + .../tests/services/remote/ssh/README.md | 2 +- .../tests/services/remote/ssh/__init__.py | 19 +-- .../tests/services/remote/ssh/fixtures.py | 30 ++-- .../remote/ssh/test_ssh_host_service.py | 3 +- .../mlos_bench/tests/storage/conftest.py | 4 +- .../mlos_bench/tests/storage/sql/README.md | 27 ++++ .../mlos_bench/tests/storage/sql/__init__.py | 72 +++++++++ .../tests/storage/sql/docker-compose.yml | 40 +++++ .../mlos_bench/tests/storage/sql/down.sh | 18 +++ .../mlos_bench/tests/storage/sql/fixtures.py | 138 +++++++++++++++++- .../storage/{ => sql}/test_storage_schemas.py | 24 ++- mlos_bench/mlos_bench/tests/storage/sql/up.sh | 87 +++++++++++ .../tests/storage/test_storage_pickling.py | 28 +++- 18 files changed, 524 insertions(+), 49 deletions(-) create mode 100644 mlos_bench/mlos_bench/tests/storage/sql/README.md create mode 100644 mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml create mode 100755 mlos_bench/mlos_bench/tests/storage/sql/down.sh rename mlos_bench/mlos_bench/tests/storage/{ => sql}/test_storage_schemas.py (68%) create mode 100755 mlos_bench/mlos_bench/tests/storage/sql/up.sh diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 3cbb63bcc4..f0071bd757 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -315,6 +315,31 @@ def _get_alembic_cfg(conn: Connection) -> config.Config: alembic_cfg.attributes["connection"] = conn return alembic_cfg + def drop_all_tables(self, *, force: bool = False) -> None: + """ + Helper method used in testing to reset the DB schema. + + Notes + ----- + This method is not intended for production use, as it will drop all tables + in the database. Use with caution. + + Parameters + ---------- + force : bool + If True, drop all tables in the target database. + If False, this method will not drop any tables and will log a warning. + """ + assert self._engine + self.meta.reflect(bind=self._engine) + if force: + self.meta.drop_all(bind=self._engine) + else: + _LOG.warning( + "Resetting the schema without force is not implemented. " + "Use force=True to drop all tables." + ) + def create(self) -> "DbSchema": """Create the DB schema.""" _LOG.info("Create the DB schema") diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index 6d98dc97fd..7fc86aff1f 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -82,6 +82,33 @@ def _schema(self) -> DbSchema: _LOG.debug("DDL statements:\n%s", self._db_schema) return self._db_schema + def _reset_schema(self, *, force: bool = False) -> None: + """ + Helper method used in testing to reset the DB schema. + + Notes + ----- + This method is not intended for production use, as it will drop all tables + in the database. Use with caution. + + Parameters + ---------- + force : bool + If True, drop all tables in the target database. + If False, this method will not drop any tables and will log a warning. + """ + assert self._engine + if force: + self._schema.drop_all_tables(force=force) + self._db_schema = DbSchema(self._engine) + self._schema_created = False + self._schema_updated = False + else: + _LOG.warning( + "Resetting the schema without force is not implemented. " + "Use force=True to drop all tables." + ) + def update_schema(self) -> None: """Update the database schema.""" if not self._schema_updated: diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index ce5fbdb45a..2d9112e52d 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -86,6 +86,30 @@ def check_class_name(obj: object, expected_class_name: str) -> bool: full_class_name = obj.__class__.__module__ + "." + obj.__class__.__name__ return full_class_name == try_resolve_class_name(expected_class_name) +HOST_DOCKER_NAME = "host.docker.internal" + + +@pytest.fixture(scope="session") +def docker_hostname() -> str: + """Returns the local hostname to use to connect to the test ssh server.""" + if sys.platform != "win32" and resolve_host_name(HOST_DOCKER_NAME): + # On Linux, if we're running in a docker container, we can use the + # --add-host (extra_hosts in docker-compose.yml) to refer to the host IP. + return HOST_DOCKER_NAME + # Docker (Desktop) for Windows (WSL2) uses a special networking magic + # to refer to the host machine as `localhost` when exposing ports. + # In all other cases, assume we're executing directly inside conda on the host. + return "localhost" + + +def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: + """Wait until a docker service is ready.""" + docker_services.wait_until_responsive( + check=lambda: check_socket(hostname, port), + timeout=30.0, + pause=0.5, + ) + def check_socket(host: str, port: int, timeout: float = 1.0) -> bool: """ diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py index ad8f9248ac..7d19c7a2c1 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -14,7 +14,7 @@ from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.services.config_persistence import ConfigPersistenceService -from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.storage.base_storage import Storage from mlos_bench.tests.config import BUILTIN_TEST_CONFIG_PATH, locate_config_examples from mlos_bench.util import get_class_from_name @@ -58,7 +58,7 @@ def test_load_scheduler_config_examples( config_path: str, mock_env_config_path: str, trial_runners: list[TrialRunner], - storage: SqlStorage, + storage: Storage, mock_opt: MockOptimizer, ) -> None: """Tests loading a config example.""" diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index becae20503..8e0aba5c00 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -90,6 +90,7 @@ def docker_compose_file(pytestconfig: pytest.Config) -> list[str]: _ = pytestconfig # unused return [ os.path.join(os.path.dirname(__file__), "services", "remote", "ssh", "docker-compose.yml"), + os.path.join(os.path.dirname(__file__), "storage", "sql", "docker-compose.yml"), # Add additional configs as necessary here. ] diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md b/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md index 4fe4216ff3..827372d58c 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md @@ -22,7 +22,7 @@ These are brought up as session fixtures under a unique (PID based) compose proj In the case of `pytest`, since the `SshService` base class implements a shared connection cache that we wish to test, and testing "rebooting" of servers (containers) is also necessary, but we want to avoid single threaded execution for tests, we start a third container only for testing reboots. -Additionally, since `scope="session"` fixtures are executed once per worker, which is excessive in our case, we use lockfiles (one of the only portal synchronization methods) to ensure that the usual `docker_services` fixture which starts and stops the containers is only executed once per test run and uses a shared compose instance. +Additionally, since `scope="session"` fixtures are executed once per worker, which is excessive in our case, we use lockfiles (one of the only portable synchronization methods) to ensure that the usual `docker_services` fixture which starts and stops the containers is only executed once per test run and uses a shared compose instance. ## See Also diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py index a6244e3a7a..877d39a26a 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py @@ -7,9 +7,6 @@ from dataclasses import dataclass from subprocess import run -from pytest_docker.plugin import Services as DockerServices - -from mlos_bench.tests import check_socket # The SSH test server port and name. # See Also: docker-compose.yml @@ -21,7 +18,12 @@ @dataclass class SshTestServerInfo: - """A data class for SshTestServerInfo.""" + """A data class for SshTestServerInfo. + + See Also + -------- + mlos_bench.tests.storage.sql.SqlTestServerInfo + """ compose_project_name: str service_name: str @@ -70,12 +72,3 @@ def to_connect_params(self, uncached: bool = False) -> dict: "port": self.get_port(uncached), "username": self.username, } - - -def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: - """Wait until a docker service is ready.""" - docker_services.wait_until_responsive( - check=lambda: check_socket(hostname, port), - timeout=30.0, - pause=0.5, - ) diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py index 79938784a7..23a4a9ed2f 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py @@ -19,36 +19,20 @@ from mlos_bench.services.remote.ssh.ssh_fileshare import SshFileShareService from mlos_bench.services.remote.ssh.ssh_host_service import SshHostService -from mlos_bench.tests import resolve_host_name +from mlos_bench.tests import wait_docker_service_socket from mlos_bench.tests.services.remote.ssh import ( ALT_TEST_SERVER_NAME, REBOOT_TEST_SERVER_NAME, SSH_TEST_SERVER_NAME, SshTestServerInfo, - wait_docker_service_socket, ) # pylint: disable=redefined-outer-name -HOST_DOCKER_NAME = "host.docker.internal" - - -@pytest.fixture(scope="session") -def ssh_test_server_hostname() -> str: - """Returns the local hostname to use to connect to the test ssh server.""" - if sys.platform != "win32" and resolve_host_name(HOST_DOCKER_NAME): - # On Linux, if we're running in a docker container, we can use the - # --add-host (extra_hosts in docker-compose.yml) to refer to the host IP. - return HOST_DOCKER_NAME - # Docker (Desktop) for Windows (WSL2) uses a special networking magic - # to refer to the host machine as `localhost` when exposing ports. - # In all other cases, assume we're executing directly inside conda on the host. - return "localhost" - @pytest.fixture(scope="session") def ssh_test_server( - ssh_test_server_hostname: str, + docker_hostname: str, docker_compose_project_name: str, locked_docker_services: DockerServices, ) -> Generator[SshTestServerInfo]: @@ -66,12 +50,14 @@ def ssh_test_server( ssh_test_server_info = SshTestServerInfo( compose_project_name=docker_compose_project_name, service_name=SSH_TEST_SERVER_NAME, - hostname=ssh_test_server_hostname, + hostname=docker_hostname, username="root", id_rsa_path=id_rsa_file.name, ) wait_docker_service_socket( - locked_docker_services, ssh_test_server_info.hostname, ssh_test_server_info.get_port() + locked_docker_services, + ssh_test_server_info.hostname, + ssh_test_server_info.get_port(), ) id_rsa_src = f"/{ssh_test_server_info.username}/.ssh/id_rsa" docker_cp_cmd = ( @@ -116,7 +102,9 @@ def alt_test_server( id_rsa_path=ssh_test_server.id_rsa_path, ) wait_docker_service_socket( - locked_docker_services, alt_test_server_info.hostname, alt_test_server_info.get_port() + locked_docker_services, + alt_test_server_info.hostname, + alt_test_server_info.get_port(), ) return alt_test_server_info diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py index 003a8e6433..30fdc804f2 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py @@ -12,13 +12,12 @@ from mlos_bench.services.remote.ssh.ssh_host_service import SshHostService from mlos_bench.services.remote.ssh.ssh_service import SshClient -from mlos_bench.tests import requires_docker +from mlos_bench.tests import requires_docker, wait_docker_service_socket, from mlos_bench.tests.services.remote.ssh import ( ALT_TEST_SERVER_NAME, REBOOT_TEST_SERVER_NAME, SSH_TEST_SERVER_NAME, SshTestServerInfo, - wait_docker_service_socket, ) _LOG = logging.getLogger(__name__) diff --git a/mlos_bench/mlos_bench/tests/storage/conftest.py b/mlos_bench/mlos_bench/tests/storage/conftest.py index c510793fac..526ea945f2 100644 --- a/mlos_bench/mlos_bench/tests/storage/conftest.py +++ b/mlos_bench/mlos_bench/tests/storage/conftest.py @@ -11,8 +11,10 @@ # same. # Expose some of those as local names so they can be picked up as fixtures by pytest. -storage = sql_storage_fixtures.storage +mysql_storage = sql_storage_fixtures.mysql_storage +postgres_storage = sql_storage_fixtures.postgres_storage sqlite_storage = sql_storage_fixtures.sqlite_storage +storage = sql_storage_fixtures.storage exp_storage = sql_storage_fixtures.exp_storage exp_no_tunables_storage = sql_storage_fixtures.exp_no_tunables_storage mixed_numerics_exp_storage = sql_storage_fixtures.mixed_numerics_exp_storage diff --git a/mlos_bench/mlos_bench/tests/storage/sql/README.md b/mlos_bench/mlos_bench/tests/storage/sql/README.md new file mode 100644 index 0000000000..c9b124eab3 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/README.md @@ -0,0 +1,27 @@ +# Sql Storage Tests + +The "unit" tests for the `SqlStorage` classes are more functional than other unit tests in that we don't merely mock them out, but actually setup small SQL databases with `docker compose` and interact with them using the `SqlStorage` class. + +To do this, we make use of the `pytest-docker` plugin to bring up the services defined in the [`docker-compose.yml`](./docker-compose.yml) file in this directory. + +There are currently two services defined in that config, though others could be added in the future: + +1. `mysql-mlos-bench-server` +1. `postgres-mlos-bench-server` + +We rely on `docker compose` to map their internal container service ports to random ports on the host. +Hence, when connecting, we need to look up these ports on demand using something akin to `docker compose port`. +Because of complexities of networking in different development environments (especially for Docker on WSL2 for Windows), we may also have to connect to a different host address than `localhost` (e.g., `host.docker.internal`, which is dynamically requested as a part of of the [devcontainer](../../../../../../.devcontainer/docker-compose.yml) setup). + +These containers are brought up as session fixtures under a unique (PID based) compose project name for each `pytest` invocation, but only when docker is detected on the host (via the `@docker_required` decorator we define in [`mlos_bench/tests/__init__.py`](../../../__init__.py)), else those tests are skipped. + +> For manual testing, to bring up/down the test infrastructure the [`up.sh`](./up.sh) and [`down.sh`](./down.sh) scripts can be used, which assigns a known project name. + +In the case of `pytest`, we also want to be able to test with a fresh state in most cases, so we use the `pytest` `yield` pattern to allow schema cleanup code to happen after the end of each test (see: `_create_storage_from_test_server_info`). +We use lockfiles to prevent races between tests that would otherwise try to create or cleanup the same database schema at the same time. + +Additionally, since `scope="session"` fixtures are executed once per worker, which is excessive in our case, we use lockfiles (one of the only portable synchronization methods) to ensure that the usual `docker_services` fixture which starts and stops the containers is only executed once per test run and uses a shared compose instance. + +## See Also + +Notes in the [`mlos_bench/tests/services/remote/ssh/README.md`](../../../services/remote/ssh/README.md) file for a similar setup for SSH services. diff --git a/mlos_bench/mlos_bench/tests/storage/sql/__init__.py b/mlos_bench/mlos_bench/tests/storage/sql/__init__.py index d17a448b5e..442e531a43 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/__init__.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/__init__.py @@ -3,3 +3,75 @@ # Licensed under the MIT License. # """Tests for mlos_bench sql storage.""" + +from dataclasses import dataclass +from subprocess import run + + +# The DB servers' names and other connection info. +# See Also: docker-compose.yml + +MYSQL_TEST_SERVER_NAME = "mysql-mlos-bench-server" +PGSQL_TEST_SERVER_NAME = "postgres-mlos-bench-server" + +SQL_TEST_SERVER_DATABASE = "mlos_bench" +SQL_TEST_SERVER_PASSWORD = "password" + + +@dataclass +class SqlTestServerInfo: + """A data class for SqlTestServerInfo. + + See Also + -------- + mlos_bench.tests.services.remote.ssh.SshTestServerInfo + """ + + compose_project_name: str + service_name: str + hostname: str + _port: int | None = None + + @property + def username(self) -> str: + """Gets the username.""" + usernames = { + MYSQL_TEST_SERVER_NAME: "root", + PGSQL_TEST_SERVER_NAME: "postgres", + } + return usernames[self.service_name] + + @property + def password(self) -> str: + """Gets the password.""" + return SQL_TEST_SERVER_PASSWORD + + @property + def database(self) -> str: + """Gets the database.""" + return SQL_TEST_SERVER_DATABASE + + def get_port(self, uncached: bool = False) -> int: + """ + Gets the port that the SSH test server is listening on. + + Note: this value can change when the service restarts so we can't rely on + the DockerServices. + """ + if self._port is None or uncached: + default_ports = { + MYSQL_TEST_SERVER_NAME: 3306, + PGSQL_TEST_SERVER_NAME: 5432, + } + default_port = default_ports[self.service_name] + port_cmd = run( + ( + f"docker compose -p {self.compose_project_name} " + f"port {self.service_name} {default_port}" + ), + shell=True, + check=True, + capture_output=True, + ) + self._port = int(port_cmd.stdout.decode().strip().split(":")[1]) + return self._port diff --git a/mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml b/mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml new file mode 100644 index 0000000000..0bfd0bce81 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml @@ -0,0 +1,40 @@ +name: mlos_bench-test-sql-storage +services: + mysql-mlos-bench-server: + hostname: mysql-mlos-bench-server + attach: false + image: docker.io/library/mysql:latest + ports: + # To allow multiple instances of this to coexist, instead of explicitly + # mapping the port, let it get assigned randomly on the host. + - ${PORT:-3306} + extra_hosts: + - host.docker.internal:host-gateway + environment: + - MYSQL_ROOT_PASSWORD=password + - MYSQL_DATABASE=mlos_bench + healthcheck: + test: ["CMD-SHELL", "mysqladmin --host localhost --protocol tcp --password=$${MYSQL_ROOT_PASSWORD} ping"] + interval: 10s + timeout: 30s + retries: 5 + start_period: 5s + postgres-mlos-bench-server: + hostname: postgres-mlos-bench-server + attach: false + image: docker.io/library/postgres:latest + ports: + # To allow multiple instances of this to coexist, instead of explicitly + # mapping the port, let it get assigned randomly on the host. + - ${PORT:-5432} + extra_hosts: + - host.docker.internal:host-gateway + environment: + - POSTGRES_PASSWORD=password + - POSTGRES_DB=mlos_bench + healthcheck: + test: ["CMD-SHELL", "pg_isready -d $${POSTGRES_DB}"] + interval: 10s + timeout: 30s + retries: 5 + start_period: 5s diff --git a/mlos_bench/mlos_bench/tests/storage/sql/down.sh b/mlos_bench/mlos_bench/tests/storage/sql/down.sh new file mode 100755 index 0000000000..3d6068cecf --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/down.sh @@ -0,0 +1,18 @@ +#!/bin/bash +## +## Copyright (c) Microsoft Corporation. +## Licensed under the MIT License. +## + +# A script to stop the containerized SQL DBMS servers. +# For pytest, the fixture in conftest.py will handle this for us using the +# pytest-docker plugin, but for manual testing, this script can be used. + +set -eu + +scriptdir=$(dirname "$(readlink -f "$0")") +cd "$scriptdir" + +PROJECT_NAME="mlos_bench-test-sql-storage-manual" + +docker compose -p "$PROJECT_NAME" down diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 0bebeeff82..50376b8727 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -8,9 +8,12 @@ import os import tempfile from collections.abc import Generator +from importlib.resources import files from random import seed as rand_seed +from filelock import FileLock import pytest +from pytest_docker.plugin import Services as DockerServices from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.schedulers.sync_scheduler import SyncScheduler @@ -19,16 +22,149 @@ from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.storage.storage_factory import from_config -from mlos_bench.tests import SEED +from mlos_bench.util import path_join +from mlos_bench.tests import SEED, wait_docker_service_socket from mlos_bench.tests.storage import ( CONFIG_TRIAL_REPEAT_COUNT, MAX_TRIALS, TRIAL_RUNNER_COUNT, ) +from mlos_bench.tests.storage.sql import ( + SqlTestServerInfo, + MYSQL_TEST_SERVER_NAME, + PGSQL_TEST_SERVER_NAME, +) from mlos_bench.tunables.tunable_groups import TunableGroups # pylint: disable=redefined-outer-name +# TODO: Add mysql_storage and postgres_storage + + +@pytest.fixture(scope="session") +def mysql_storage_info( + docker_hostname: str, + docker_compose_project_name: str, + locked_docker_services: DockerServices, +) -> SqlTestServerInfo: + """ + Fixture for getting mysql storage connection info. + """ + storage_info = SqlTestServerInfo( + compose_project_name=docker_compose_project_name, + service_name=MYSQL_TEST_SERVER_NAME, + hostname=docker_hostname, + ) + wait_docker_service_socket( + locked_docker_services, + storage_info.hostname, + storage_info.get_port(), + ) + return storage_info + + +@pytest.fixture(scope="session") +def postgres_storage_info( + docker_hostname: str, + docker_compose_project_name: str, + locked_docker_services: DockerServices, +) -> SqlTestServerInfo: + """ + Fixture for getting postgres storage connection info. + """ + storage_info = SqlTestServerInfo( + compose_project_name=docker_compose_project_name, + service_name=PGSQL_TEST_SERVER_NAME, + hostname=docker_hostname, + ) + wait_docker_service_socket( + locked_docker_services, + storage_info.hostname, + storage_info.get_port(), + ) + return storage_info + + +def _create_storage_from_test_server_info( + config_file: str, + test_server_info: SqlTestServerInfo, + shared_temp_dir: str, + short_testrun_uid: str, +) -> Generator[SqlStorage]: + """ + Creates a SqlStorage instance from the given test server info. + + Notes + ----- + Resets the schema as a cleanup operation on return from the function scope + fixture so each test gets a fresh storage instance. + Uses a file lock to ensure that only one test can access the storage at a time. + + Yields + ------ + SqlStorage + """ + sql_storage_name = test_server_info.service_name + with FileLock(path_join(shared_temp_dir, f"{sql_storage_name}-{short_testrun_uid}.lock")): + global_config = { + "host": test_server_info.username, + "port": test_server_info.get_port() or 0, + "database": test_server_info.database, + "username": test_server_info.username, + "password": test_server_info.password, + "lazy_schema_create": True, + } + storage = from_config( + config_file, + global_configs=[json.dumps(global_config)], + ) + assert isinstance(storage, SqlStorage) + yield storage + # Cleanup the storage on return + storage._reset_schema(force=True) # pylint: disable=protected-access + + +@pytest.fixture(scope="function") +def mysql_storage( + mysql_storage_info: SqlTestServerInfo, + shared_temp_dir: str, + short_testrun_uid: str, +) -> Generator[SqlStorage]: + """ + Fixture of a MySQL backed SqlStorage engine. + + See Also + -------- + _create_storage_from_test_server_info + """ + return _create_storage_from_test_server_info( + path_join(str(files("mlos_bench.config")), "storage", "mysql.jsonc"), + mysql_storage_info, + shared_temp_dir, + short_testrun_uid, + ) + + +@pytest.fixture(scope="function") +def postgres_storage( + postgres_storage_info: SqlTestServerInfo, + shared_temp_dir: str, + short_testrun_uid: str, +) -> Generator[SqlStorage]: + """ + Fixture of a MySQL backed SqlStorage engine. + + See Also + -------- + _create_storage_from_test_server_info + """ + return _create_storage_from_test_server_info( + path_join(str(files("mlos_bench.config")), "storage", "postgresql.jsonc"), + postgres_storage_info, + shared_temp_dir, + short_testrun_uid, + ) + @pytest.fixture def sqlite_storage() -> Generator[SqlStorage]: diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py similarity index 68% rename from mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py rename to mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py index 1cbdaa9c6a..660f4e245d 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py @@ -4,21 +4,37 @@ # """Test sql schemas for mlos_bench storage.""" +import pytest +from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture + from alembic.migration import MigrationContext from sqlalchemy import inspect from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.tests import DOCKER + # NOTE: This value is hardcoded to the latest revision in the alembic versions directory. # It could also be obtained programmatically using the "alembic heads" command or heads() API. # See Also: schema.py for an example of programmatic alembic config access. CURRENT_ALEMBIC_HEAD = "8928a401115b" -# TODO: Test other engines as well. - -def test_storage_schemas(storage: SqlStorage) -> None: +# Try to test multiple DBMS engines. +docker_dbms_fixtures = [] +if DOCKER: + docker_dbms_fixtures = [ + lazy_fixture("mysql_storage"), + lazy_fixture("postgres_storage"), + ] +@pytest.mark.parameterize( + ["some_sql_storage_fixture"], [ + lazy_fixture("sql_storage"), + *docker_dbms_fixtures, + ] +) +def test_storage_schemas(some_sql_storage_fixture: SqlStorage) -> None: """Test storage schema creation.""" - eng = storage._engine # pylint: disable=protected-access + eng = some_sql_storage_fixture._engine # pylint: disable=protected-access with eng.connect() as conn: # pylint: disable=protected-access inspector = inspect(conn) # Make sure the "trial_runner_id" column exists. diff --git a/mlos_bench/mlos_bench/tests/storage/sql/up.sh b/mlos_bench/mlos_bench/tests/storage/sql/up.sh new file mode 100755 index 0000000000..fd99d1770d --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/up.sh @@ -0,0 +1,87 @@ +#!/bin/bash +## +## Copyright (c) Microsoft Corporation. +## Licensed under the MIT License. +## + +# A script to start the containerized SQL DBMS servers. +# For pytest, the fixture in conftest.py will handle this for us using the +# pytest-docker plugin, but for manual testing, this script can be used. + +set -eu +set -x + +scriptdir=$(dirname "$(readlink -f "$0")") +cd "$scriptdir" + +PROJECT_NAME="mlos_bench-test-sql-storage-manual" +CONTAINER_COUNT=2 + +if ! type mysqladmin >/dev/null 2>&1; then + echo "ERROR: Missing mysqladmin tool to check status of the server." >&2 + exit 1 +fi + +if ! type psql >/dev/null 2>&1; then + echo "ERROR: Missing psql tool to check status of the server." >&2 + exit 1 +fi + + +docker compose -p "$PROJECT_NAME" up --build --remove-orphans -d +set +x + +while ! docker compose -p "$PROJECT_NAME" ps --format '{{.Name}} {{.State}} {{.Health}}' | grep -c ' running healthy$' | grep -q -x $CONTAINER_COUNT; do + echo "Waiting for $CONTAINER_COUNT containers to report healthy ..." + sleep 1 +done + +mysql_port=$(docker compose -p "$PROJECT_NAME" port mysql-mlos-bench-server ${PORT:-3306} | cut -d: -f2) +echo "Connect to the mysql server container at the following port: $mysql_port" +postgres_port=$(docker compose -p "$PROJECT_NAME" port postgres-mlos-bench-server ${PORT:-5432} | cut -d: -f2) +echo "Connect to the postgres server container at the following port: $postgres_port" + +# TODO: Remove the rest of this: + +mysql_ok=0 +if ! type mysqladmin >/dev/null 2>&1; then + echo "WARNING: Missing mysqladmin tool to check status of the server." >&2 + mysql_ok=1 +fi + +pg_ok=0 +if ! type psql >/dev/null 2>&1; then + echo "WARNING: Missing psql tool to check status of the server." >&2 + pg_ok=1 +fi + +for i in {1..10}; do + if [ "$mysql_ok" == "1" ]; then + break + fi + if mysqladmin -h localhost --port $mysql_port --protocol tcp --password=password ping >/dev/null; then + mysql_ok=1 + else + sleep 1 + fi +done +if [ "$mysql_ok" != 1 ]; then + echo "ERR: MySQL failed to start." >&2 + exit 1 +fi + +for i in {1..10}; do + if [ "$pg_ok" == "1" ]; then + break + fi + if PGPASSWORD=password psql -h localhost -p $postgres_port -U postgres mlos_bench -c "SELECT 1;" > /dev/null; then + pg_ok=1 + break + else + sleep 1 + fi +done +if [ "$pg_ok" != 1 ]; then + echo "ERR: Postgres failed to start." >&2 + exit 1 +fi diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py index 7871e7f68c..1aa29b5efe 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py @@ -9,11 +9,20 @@ from typing import Literal import pytest +from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture from pytz import UTC from mlos_bench.environments.status import Status -from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.storage.base_storage import Storage from mlos_bench.tunables.tunable_groups import TunableGroups +from mlos_bench.tests import DOCKER + +docker_dbms_fixtures = [] +if DOCKER: + docker_dbms_fixtures = [ + lazy_fixture("mysql_storage"), + lazy_fixture("postgres_storage"), + ] # TODO: When we introduce ParallelTrialScheduler warn at config startup time @@ -22,14 +31,25 @@ sys.platform == "win32", reason="Windows doesn't support multiple processes accessing the same file.", ) +@pytest.mark.parametrize( + ["persistent_storage"], + [ + # TODO: Improve this test to support non-sql backends eventually as well. + lazy_fixture("sqlite_storage"), + *docker_dbms_fixtures, + ], +) def test_storage_pickle_restore_experiment_and_trial( - sqlite_storage: SqlStorage, + persistent_storage: Storage, tunable_groups: TunableGroups, ) -> None: """Check that we can pickle and unpickle the Storage object, and restore Experiment and Trial by id. """ - storage = sqlite_storage + storage = persistent_storage + storage_class = storage.__class__ + assert issubclass(storage_class, Storage) + assert storage_class != Storage # Create an Experiment and a Trial opt_targets: dict[str, Literal["min", "max"]] = {"metric": "min"} experiment = storage.experiment( @@ -49,7 +69,7 @@ def test_storage_pickle_restore_experiment_and_trial( # Pickle and unpickle the Storage object pickled = pickle.dumps(storage) restored_storage = pickle.loads(pickled) - assert isinstance(restored_storage, SqlStorage) + assert isinstance(restored_storage, storage_class) # Restore the Experiment from storage by id and check that it matches the original restored_experiment = restored_storage.get_experiment_by_id( From 5d3f0d3c66d9e827caf61649cbc34c436d397042 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 16:35:31 -0500 Subject: [PATCH 08/49] change column lengths for mysql --- mlos_bench/mlos_bench/storage/sql/schema.py | 56 ++++++++++++--------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index f0071bd757..facc57b818 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -73,12 +73,6 @@ class DbSchema: # for all DB tables, so it's ok to disable the warnings. # pylint: disable=too-many-instance-attributes - # Common string column sizes. - _ID_LEN = 512 - _PARAM_VALUE_LEN = 1024 - _METRIC_VALUE_LEN = 255 - _STATUS_LEN = 16 - def __init__(self, engine: Engine | None): """ Declare the SQLAlchemy schema for the database. @@ -96,10 +90,24 @@ def __init__(self, engine: Engine | None): self._engine = engine self._meta = MetaData() + # Common string column sizes. + self._exp_id_len = 512 + self._param_id_len = 512 + self._param_value_len = 1024 + self._metric_id_len = 512 + self._metric_value_len = 255 + self._status_len = 16 + + # Some overrides for certain DB engines: + if engine and engine.dialect.name in {"mysql", "mariadb"}: + self._exp_id_len = 255 + self._param_id_len = 255 + self._metric_id_len = 255 + self.experiment = Table( "experiment", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("description", String(1024)), Column("root_env_config", String(1024), nullable=False), Column("git_repo", String(1024), nullable=False), @@ -109,7 +117,7 @@ def __init__(self, engine: Engine | None): Column("ts_end", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. - Column("status", String(self._STATUS_LEN)), + Column("status", String(self._status_len)), # There may be more than one mlos_benchd_service running on different hosts. # This column stores the host/container name of the driver that # picked up the experiment. @@ -127,7 +135,7 @@ def __init__(self, engine: Engine | None): "objectives", self._meta, Column("exp_id"), - Column("optimization_target", String(self._ID_LEN), nullable=False), + Column("optimization_target", String(self._metric_id_len), nullable=False), Column("optimization_direction", String(4), nullable=False), # TODO: Note: weight is not fully supported yet as currently # multi-objective is expected to explore each objective equally. @@ -176,7 +184,7 @@ def __init__(self, engine: Engine | None): self.trial = Table( "trial", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), @@ -191,7 +199,7 @@ def __init__(self, engine: Engine | None): nullable=True, ), # Should match the text IDs of `mlos_bench.environments.Status` enum: - Column("status", String(self._STATUS_LEN), nullable=False), + Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), @@ -206,8 +214,8 @@ def __init__(self, engine: Engine | None): "config_param", self._meta, Column("config_id", Integer, nullable=False), - Column("param_id", String(self._ID_LEN), nullable=False), - Column("param_value", String(self._PARAM_VALUE_LEN)), + Column("param_id", String(self._param_id_len), nullable=False), + Column("param_value", String(self._param_value_len)), PrimaryKeyConstraint("config_id", "param_id"), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), ) @@ -221,10 +229,10 @@ def __init__(self, engine: Engine | None): self.trial_param = Table( "trial_param", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("param_id", String(self._ID_LEN), nullable=False), - Column("param_value", String(self._PARAM_VALUE_LEN)), + Column("param_id", String(self._param_id_len), nullable=False), + Column("param_value", String(self._param_value_len)), PrimaryKeyConstraint("exp_id", "trial_id", "param_id"), ForeignKeyConstraint( ["exp_id", "trial_id"], @@ -239,7 +247,7 @@ def __init__(self, engine: Engine | None): self.trial_status = Table( "trial_status", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), Column( "ts", @@ -247,7 +255,7 @@ def __init__(self, engine: Engine | None): nullable=False, default="now", ), - Column("status", String(self._STATUS_LEN), nullable=False), + Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( ["exp_id", "trial_id"], @@ -261,10 +269,10 @@ def __init__(self, engine: Engine | None): self.trial_result = Table( "trial_result", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("metric_id", String(self._ID_LEN), nullable=False), - Column("metric_value", String(self._METRIC_VALUE_LEN)), + Column("metric_id", String(self._metric_id_len), nullable=False), + Column("metric_value", String(self._metric_value_len)), PrimaryKeyConstraint("exp_id", "trial_id", "metric_id"), ForeignKeyConstraint( ["exp_id", "trial_id"], @@ -279,7 +287,7 @@ def __init__(self, engine: Engine | None): self.trial_telemetry = Table( "trial_telemetry", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), Column( "ts", @@ -287,8 +295,8 @@ def __init__(self, engine: Engine | None): nullable=False, default="now", ), - Column("metric_id", String(self._ID_LEN), nullable=False), - Column("metric_value", String(self._METRIC_VALUE_LEN)), + Column("metric_id", String(self._metric_id_len), nullable=False), + Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), ForeignKeyConstraint( ["exp_id", "trial_id"], From 01b9df4101799ee0ad6c2a3c5d27b38d78c81fa2 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 16:35:41 -0500 Subject: [PATCH 09/49] more refactor of storage tests --- mlos_bench/mlos_bench/tests/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index 2d9112e52d..63be24b311 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -11,9 +11,11 @@ import os import shutil import socket +import sys from datetime import tzinfo from logging import debug, warning from subprocess import run +from pytest_docker.plugin import Services as DockerServices import pytest import pytz From d5781765fa15691d7b2f60a3700468baf0186ecd Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 11:18:46 -0500 Subject: [PATCH 10/49] preparing to support testing multiple backend engines for schema changes --- .../mlos_bench/config/storage/mysql.jsonc | 2 +- .../config/storage/postgresql.jsonc | 2 +- mlos_bench/mlos_bench/storage/sql/alembic.ini | 3 + .../mlos_bench/storage/sql/alembic/README.md | 115 +++++++++++++++--- .../tests/storage/test_storage_schemas.py | 1 + 5 files changed, 106 insertions(+), 17 deletions(-) diff --git a/mlos_bench/mlos_bench/config/storage/mysql.jsonc b/mlos_bench/mlos_bench/config/storage/mysql.jsonc index c133b379ba..15986de4c2 100644 --- a/mlos_bench/mlos_bench/config/storage/mysql.jsonc +++ b/mlos_bench/mlos_bench/config/storage/mysql.jsonc @@ -6,7 +6,7 @@ "log_sql": false, // Write all SQL statements to the log. // Parameters below must match kwargs of `sqlalchemy.URL.create()`: "drivername": "mysql+mysqlconnector", - "database": "osat", + "database": "mlos_bench", "username": "root", "password": "PLACERHOLDER PASSWORD", // Comes from global config "host": "localhost", diff --git a/mlos_bench/mlos_bench/config/storage/postgresql.jsonc b/mlos_bench/mlos_bench/config/storage/postgresql.jsonc index cd1214835c..1cff2d76b7 100644 --- a/mlos_bench/mlos_bench/config/storage/postgresql.jsonc +++ b/mlos_bench/mlos_bench/config/storage/postgresql.jsonc @@ -8,7 +8,7 @@ "log_sql": false, // Write all SQL statements to the log. // Parameters below must match kwargs of `sqlalchemy.URL.create()`: "drivername": "postgresql+psycopg2", - "database": "osat", + "database": "mlos_bench", "username": "postgres", "password": "PLACERHOLDER PASSWORD", // Comes from global config "host": "localhost", diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index 4d2a1120c5..7f88d809ee 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -63,7 +63,10 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # output_encoding = utf-8 # See README.md for details. +# Uncomment one of these: sqlalchemy.url = sqlite:///mlos_bench.sqlite +#sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench +#sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench [post_write_hooks] diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md index ec35eb70f6..9a9aafe1f4 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/README.md +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -4,49 +4,134 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla ## Overview -1. Create a blank `mlos_bench.sqlite` database file in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: +1. Create a blank database instance in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: - ```sh - cd mlos_bench/storage/sql - rm mlos_bench.sqlite - mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only - ``` + This allows `alembic` to automatically generate a migration script from the current schema. + + > NOTE: If your schema changes target a particular backend engine (e.g., using `with_variant`) you will need to use an engine with that config for this step. + > \ + > In the remainder of this document we should some examples for different DB types. + > Pick the one you're targeting and stick with it thru the example. + > You may need to repeat the process several times to test all of them. + > - [ ] TODO: Add scripts to automatically do this for several different backend engines all at once. + + For instance: + + 1. Start a temporary server either as a local file or in a docker instance + + ```sh + # sqlite + cd mlos_bench/storage/sql + rm -f mlos_bench.sqlite + ``` + + ```sh + # mysql + docker run -it --rm --name mysql-alembic --env MYSQL_ROOT_PASSWORD=password --env MYSQL_DATABASE=mlos_bench -p 3306:3306 mysql:latest + ``` - > This allows `alembic` to automatically generate a migration script from the current schema. + ```sh + # postgres + docker run -it --rm --name postgres-alembic --env POSTGRES_PASSWORD=password --env POSTGRES_DB=mlos_bench -p 5432:5432 postgres:latest + ``` -1. Adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. + 1. Adjust the `sqlalchemy.url` in the [`alembic.ini`](../alembic.ini) file. + + ```ini + # Uncomment one of these. + # See README.md for details. + + #sqlalchemy.url = sqlite:///mlos_bench.sqlite + sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench + #sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench + ``` + + 1. Prime the DB schema + + ```sh + # sqlite + mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # mysql + mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # postgres + mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password + ``` + +2. Now, adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. > Keep each change small and atomic. > For example, if you want to add a new column, do that in one change. > If you want to rename a column, do that in another change. -1. Generate a new migration script with the following command: +3. Generate a new migration script with the following command: ```sh - alembic revision --autogenerate -m "Descriptive text about the change." + alembic revision --autogenerate -m "CHANGEME: Descriptive text about the change." ``` -1. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. +4. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. -1. Verify that the migration script works by running the following command: +5. Verify that the migration script works by running the following command: ```sh + # sqlite mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only ``` + ```sh + # mysql: + mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # postgres: + mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password + ``` + > Normally this would be done with `alembic upgrade head`, but this command is convenient to ensure if will work with the `mlos_bench` command line interface as well. Examine the results using something like: ```sh + # For sqlite: sqlite3 mlos_bench.sqlite .schema sqlite3 mlos_bench.sqlite "SELECT * FROM alembic_version;" ``` -1. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. + ```sh + # For mysql: + mysql --user root --password=password --host localhost --protocol tcp --database mlos_bench -e "SHOW TABLES; SELECT * FROM alembic_version;" + ``` + + ```sh + # For postgres: + PGPASSWORD=password psql -h localhost -p 5432 -U postgres mlos_bench -c "SELECT table_name FROM information_schema.tables WHERE table_schema='public' and table_catalog='mlos_bench'; SELECT * FROM alembic_version;" + ``` + + > Use different CLI clients for targeting other engines. + +6. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. > Be sure to update the latest version in the [`test_storage_schemas.py`](../../../tests/storage/test_storage_schemas.py) file as well. -1. Merge that to the `main` branch. +7. Cleanup any server instances you started. + + For instance: + + ```sh + rm mlos_bench/storage/sql/mlos_bench.sqlite + ``` + + ```sh + docker kill mysql-alembic + ``` + +8. Merge that to the `main` branch. -1. Might be good to cut a new `mlos_bench` release at this point as well. +9. Might be good to cut a new `mlos_bench` release at this point as well. diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py index 8a6c36e6bb..1cbdaa9c6a 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py @@ -14,6 +14,7 @@ # See Also: schema.py for an example of programmatic alembic config access. CURRENT_ALEMBIC_HEAD = "8928a401115b" +# TODO: Test other engines as well. def test_storage_schemas(storage: SqlStorage) -> None: """Test storage schema creation.""" From 3b3bf6ce50ff23e43bd0bf62c26f2410877ea458 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 16:34:31 -0500 Subject: [PATCH 11/49] refactoring storage tests to check other db engines --- mlos_bench/mlos_bench/storage/sql/schema.py | 25 ++++ mlos_bench/mlos_bench/storage/sql/storage.py | 27 ++++ mlos_bench/mlos_bench/tests/__init__.py | 24 +++ .../test_load_scheduler_config_examples.py | 4 +- mlos_bench/mlos_bench/tests/conftest.py | 1 + .../tests/services/remote/ssh/README.md | 2 +- .../tests/services/remote/ssh/__init__.py | 19 +-- .../tests/services/remote/ssh/fixtures.py | 30 ++-- .../remote/ssh/test_ssh_host_service.py | 3 +- .../mlos_bench/tests/storage/conftest.py | 4 +- .../mlos_bench/tests/storage/sql/README.md | 27 ++++ .../mlos_bench/tests/storage/sql/__init__.py | 72 +++++++++ .../tests/storage/sql/docker-compose.yml | 40 +++++ .../mlos_bench/tests/storage/sql/down.sh | 18 +++ .../mlos_bench/tests/storage/sql/fixtures.py | 138 +++++++++++++++++- .../storage/{ => sql}/test_storage_schemas.py | 24 ++- mlos_bench/mlos_bench/tests/storage/sql/up.sh | 87 +++++++++++ .../tests/storage/test_storage_pickling.py | 28 +++- 18 files changed, 524 insertions(+), 49 deletions(-) create mode 100644 mlos_bench/mlos_bench/tests/storage/sql/README.md create mode 100644 mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml create mode 100755 mlos_bench/mlos_bench/tests/storage/sql/down.sh rename mlos_bench/mlos_bench/tests/storage/{ => sql}/test_storage_schemas.py (68%) create mode 100755 mlos_bench/mlos_bench/tests/storage/sql/up.sh diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 2bc00f0082..c6a7849880 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -296,6 +296,31 @@ def _get_alembic_cfg(conn: Connection) -> config.Config: alembic_cfg.attributes["connection"] = conn return alembic_cfg + def drop_all_tables(self, *, force: bool = False) -> None: + """ + Helper method used in testing to reset the DB schema. + + Notes + ----- + This method is not intended for production use, as it will drop all tables + in the database. Use with caution. + + Parameters + ---------- + force : bool + If True, drop all tables in the target database. + If False, this method will not drop any tables and will log a warning. + """ + assert self._engine + self.meta.reflect(bind=self._engine) + if force: + self.meta.drop_all(bind=self._engine) + else: + _LOG.warning( + "Resetting the schema without force is not implemented. " + "Use force=True to drop all tables." + ) + def create(self) -> "DbSchema": """Create the DB schema.""" _LOG.info("Create the DB schema") diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index 6d98dc97fd..7fc86aff1f 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -82,6 +82,33 @@ def _schema(self) -> DbSchema: _LOG.debug("DDL statements:\n%s", self._db_schema) return self._db_schema + def _reset_schema(self, *, force: bool = False) -> None: + """ + Helper method used in testing to reset the DB schema. + + Notes + ----- + This method is not intended for production use, as it will drop all tables + in the database. Use with caution. + + Parameters + ---------- + force : bool + If True, drop all tables in the target database. + If False, this method will not drop any tables and will log a warning. + """ + assert self._engine + if force: + self._schema.drop_all_tables(force=force) + self._db_schema = DbSchema(self._engine) + self._schema_created = False + self._schema_updated = False + else: + _LOG.warning( + "Resetting the schema without force is not implemented. " + "Use force=True to drop all tables." + ) + def update_schema(self) -> None: """Update the database schema.""" if not self._schema_updated: diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index ce5fbdb45a..2d9112e52d 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -86,6 +86,30 @@ def check_class_name(obj: object, expected_class_name: str) -> bool: full_class_name = obj.__class__.__module__ + "." + obj.__class__.__name__ return full_class_name == try_resolve_class_name(expected_class_name) +HOST_DOCKER_NAME = "host.docker.internal" + + +@pytest.fixture(scope="session") +def docker_hostname() -> str: + """Returns the local hostname to use to connect to the test ssh server.""" + if sys.platform != "win32" and resolve_host_name(HOST_DOCKER_NAME): + # On Linux, if we're running in a docker container, we can use the + # --add-host (extra_hosts in docker-compose.yml) to refer to the host IP. + return HOST_DOCKER_NAME + # Docker (Desktop) for Windows (WSL2) uses a special networking magic + # to refer to the host machine as `localhost` when exposing ports. + # In all other cases, assume we're executing directly inside conda on the host. + return "localhost" + + +def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: + """Wait until a docker service is ready.""" + docker_services.wait_until_responsive( + check=lambda: check_socket(hostname, port), + timeout=30.0, + pause=0.5, + ) + def check_socket(host: str, port: int, timeout: float = 1.0) -> bool: """ diff --git a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py index ad8f9248ac..7d19c7a2c1 100644 --- a/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py +++ b/mlos_bench/mlos_bench/tests/config/schedulers/test_load_scheduler_config_examples.py @@ -14,7 +14,7 @@ from mlos_bench.schedulers.base_scheduler import Scheduler from mlos_bench.schedulers.trial_runner import TrialRunner from mlos_bench.services.config_persistence import ConfigPersistenceService -from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.storage.base_storage import Storage from mlos_bench.tests.config import BUILTIN_TEST_CONFIG_PATH, locate_config_examples from mlos_bench.util import get_class_from_name @@ -58,7 +58,7 @@ def test_load_scheduler_config_examples( config_path: str, mock_env_config_path: str, trial_runners: list[TrialRunner], - storage: SqlStorage, + storage: Storage, mock_opt: MockOptimizer, ) -> None: """Tests loading a config example.""" diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index becae20503..8e0aba5c00 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -90,6 +90,7 @@ def docker_compose_file(pytestconfig: pytest.Config) -> list[str]: _ = pytestconfig # unused return [ os.path.join(os.path.dirname(__file__), "services", "remote", "ssh", "docker-compose.yml"), + os.path.join(os.path.dirname(__file__), "storage", "sql", "docker-compose.yml"), # Add additional configs as necessary here. ] diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md b/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md index 4fe4216ff3..827372d58c 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/README.md @@ -22,7 +22,7 @@ These are brought up as session fixtures under a unique (PID based) compose proj In the case of `pytest`, since the `SshService` base class implements a shared connection cache that we wish to test, and testing "rebooting" of servers (containers) is also necessary, but we want to avoid single threaded execution for tests, we start a third container only for testing reboots. -Additionally, since `scope="session"` fixtures are executed once per worker, which is excessive in our case, we use lockfiles (one of the only portal synchronization methods) to ensure that the usual `docker_services` fixture which starts and stops the containers is only executed once per test run and uses a shared compose instance. +Additionally, since `scope="session"` fixtures are executed once per worker, which is excessive in our case, we use lockfiles (one of the only portable synchronization methods) to ensure that the usual `docker_services` fixture which starts and stops the containers is only executed once per test run and uses a shared compose instance. ## See Also diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py index a6244e3a7a..877d39a26a 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py @@ -7,9 +7,6 @@ from dataclasses import dataclass from subprocess import run -from pytest_docker.plugin import Services as DockerServices - -from mlos_bench.tests import check_socket # The SSH test server port and name. # See Also: docker-compose.yml @@ -21,7 +18,12 @@ @dataclass class SshTestServerInfo: - """A data class for SshTestServerInfo.""" + """A data class for SshTestServerInfo. + + See Also + -------- + mlos_bench.tests.storage.sql.SqlTestServerInfo + """ compose_project_name: str service_name: str @@ -70,12 +72,3 @@ def to_connect_params(self, uncached: bool = False) -> dict: "port": self.get_port(uncached), "username": self.username, } - - -def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: - """Wait until a docker service is ready.""" - docker_services.wait_until_responsive( - check=lambda: check_socket(hostname, port), - timeout=30.0, - pause=0.5, - ) diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py index 79938784a7..23a4a9ed2f 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py @@ -19,36 +19,20 @@ from mlos_bench.services.remote.ssh.ssh_fileshare import SshFileShareService from mlos_bench.services.remote.ssh.ssh_host_service import SshHostService -from mlos_bench.tests import resolve_host_name +from mlos_bench.tests import wait_docker_service_socket from mlos_bench.tests.services.remote.ssh import ( ALT_TEST_SERVER_NAME, REBOOT_TEST_SERVER_NAME, SSH_TEST_SERVER_NAME, SshTestServerInfo, - wait_docker_service_socket, ) # pylint: disable=redefined-outer-name -HOST_DOCKER_NAME = "host.docker.internal" - - -@pytest.fixture(scope="session") -def ssh_test_server_hostname() -> str: - """Returns the local hostname to use to connect to the test ssh server.""" - if sys.platform != "win32" and resolve_host_name(HOST_DOCKER_NAME): - # On Linux, if we're running in a docker container, we can use the - # --add-host (extra_hosts in docker-compose.yml) to refer to the host IP. - return HOST_DOCKER_NAME - # Docker (Desktop) for Windows (WSL2) uses a special networking magic - # to refer to the host machine as `localhost` when exposing ports. - # In all other cases, assume we're executing directly inside conda on the host. - return "localhost" - @pytest.fixture(scope="session") def ssh_test_server( - ssh_test_server_hostname: str, + docker_hostname: str, docker_compose_project_name: str, locked_docker_services: DockerServices, ) -> Generator[SshTestServerInfo]: @@ -66,12 +50,14 @@ def ssh_test_server( ssh_test_server_info = SshTestServerInfo( compose_project_name=docker_compose_project_name, service_name=SSH_TEST_SERVER_NAME, - hostname=ssh_test_server_hostname, + hostname=docker_hostname, username="root", id_rsa_path=id_rsa_file.name, ) wait_docker_service_socket( - locked_docker_services, ssh_test_server_info.hostname, ssh_test_server_info.get_port() + locked_docker_services, + ssh_test_server_info.hostname, + ssh_test_server_info.get_port(), ) id_rsa_src = f"/{ssh_test_server_info.username}/.ssh/id_rsa" docker_cp_cmd = ( @@ -116,7 +102,9 @@ def alt_test_server( id_rsa_path=ssh_test_server.id_rsa_path, ) wait_docker_service_socket( - locked_docker_services, alt_test_server_info.hostname, alt_test_server_info.get_port() + locked_docker_services, + alt_test_server_info.hostname, + alt_test_server_info.get_port(), ) return alt_test_server_info diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py index 003a8e6433..30fdc804f2 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py @@ -12,13 +12,12 @@ from mlos_bench.services.remote.ssh.ssh_host_service import SshHostService from mlos_bench.services.remote.ssh.ssh_service import SshClient -from mlos_bench.tests import requires_docker +from mlos_bench.tests import requires_docker, wait_docker_service_socket, from mlos_bench.tests.services.remote.ssh import ( ALT_TEST_SERVER_NAME, REBOOT_TEST_SERVER_NAME, SSH_TEST_SERVER_NAME, SshTestServerInfo, - wait_docker_service_socket, ) _LOG = logging.getLogger(__name__) diff --git a/mlos_bench/mlos_bench/tests/storage/conftest.py b/mlos_bench/mlos_bench/tests/storage/conftest.py index c510793fac..526ea945f2 100644 --- a/mlos_bench/mlos_bench/tests/storage/conftest.py +++ b/mlos_bench/mlos_bench/tests/storage/conftest.py @@ -11,8 +11,10 @@ # same. # Expose some of those as local names so they can be picked up as fixtures by pytest. -storage = sql_storage_fixtures.storage +mysql_storage = sql_storage_fixtures.mysql_storage +postgres_storage = sql_storage_fixtures.postgres_storage sqlite_storage = sql_storage_fixtures.sqlite_storage +storage = sql_storage_fixtures.storage exp_storage = sql_storage_fixtures.exp_storage exp_no_tunables_storage = sql_storage_fixtures.exp_no_tunables_storage mixed_numerics_exp_storage = sql_storage_fixtures.mixed_numerics_exp_storage diff --git a/mlos_bench/mlos_bench/tests/storage/sql/README.md b/mlos_bench/mlos_bench/tests/storage/sql/README.md new file mode 100644 index 0000000000..c9b124eab3 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/README.md @@ -0,0 +1,27 @@ +# Sql Storage Tests + +The "unit" tests for the `SqlStorage` classes are more functional than other unit tests in that we don't merely mock them out, but actually setup small SQL databases with `docker compose` and interact with them using the `SqlStorage` class. + +To do this, we make use of the `pytest-docker` plugin to bring up the services defined in the [`docker-compose.yml`](./docker-compose.yml) file in this directory. + +There are currently two services defined in that config, though others could be added in the future: + +1. `mysql-mlos-bench-server` +1. `postgres-mlos-bench-server` + +We rely on `docker compose` to map their internal container service ports to random ports on the host. +Hence, when connecting, we need to look up these ports on demand using something akin to `docker compose port`. +Because of complexities of networking in different development environments (especially for Docker on WSL2 for Windows), we may also have to connect to a different host address than `localhost` (e.g., `host.docker.internal`, which is dynamically requested as a part of of the [devcontainer](../../../../../../.devcontainer/docker-compose.yml) setup). + +These containers are brought up as session fixtures under a unique (PID based) compose project name for each `pytest` invocation, but only when docker is detected on the host (via the `@docker_required` decorator we define in [`mlos_bench/tests/__init__.py`](../../../__init__.py)), else those tests are skipped. + +> For manual testing, to bring up/down the test infrastructure the [`up.sh`](./up.sh) and [`down.sh`](./down.sh) scripts can be used, which assigns a known project name. + +In the case of `pytest`, we also want to be able to test with a fresh state in most cases, so we use the `pytest` `yield` pattern to allow schema cleanup code to happen after the end of each test (see: `_create_storage_from_test_server_info`). +We use lockfiles to prevent races between tests that would otherwise try to create or cleanup the same database schema at the same time. + +Additionally, since `scope="session"` fixtures are executed once per worker, which is excessive in our case, we use lockfiles (one of the only portable synchronization methods) to ensure that the usual `docker_services` fixture which starts and stops the containers is only executed once per test run and uses a shared compose instance. + +## See Also + +Notes in the [`mlos_bench/tests/services/remote/ssh/README.md`](../../../services/remote/ssh/README.md) file for a similar setup for SSH services. diff --git a/mlos_bench/mlos_bench/tests/storage/sql/__init__.py b/mlos_bench/mlos_bench/tests/storage/sql/__init__.py index d17a448b5e..442e531a43 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/__init__.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/__init__.py @@ -3,3 +3,75 @@ # Licensed under the MIT License. # """Tests for mlos_bench sql storage.""" + +from dataclasses import dataclass +from subprocess import run + + +# The DB servers' names and other connection info. +# See Also: docker-compose.yml + +MYSQL_TEST_SERVER_NAME = "mysql-mlos-bench-server" +PGSQL_TEST_SERVER_NAME = "postgres-mlos-bench-server" + +SQL_TEST_SERVER_DATABASE = "mlos_bench" +SQL_TEST_SERVER_PASSWORD = "password" + + +@dataclass +class SqlTestServerInfo: + """A data class for SqlTestServerInfo. + + See Also + -------- + mlos_bench.tests.services.remote.ssh.SshTestServerInfo + """ + + compose_project_name: str + service_name: str + hostname: str + _port: int | None = None + + @property + def username(self) -> str: + """Gets the username.""" + usernames = { + MYSQL_TEST_SERVER_NAME: "root", + PGSQL_TEST_SERVER_NAME: "postgres", + } + return usernames[self.service_name] + + @property + def password(self) -> str: + """Gets the password.""" + return SQL_TEST_SERVER_PASSWORD + + @property + def database(self) -> str: + """Gets the database.""" + return SQL_TEST_SERVER_DATABASE + + def get_port(self, uncached: bool = False) -> int: + """ + Gets the port that the SSH test server is listening on. + + Note: this value can change when the service restarts so we can't rely on + the DockerServices. + """ + if self._port is None or uncached: + default_ports = { + MYSQL_TEST_SERVER_NAME: 3306, + PGSQL_TEST_SERVER_NAME: 5432, + } + default_port = default_ports[self.service_name] + port_cmd = run( + ( + f"docker compose -p {self.compose_project_name} " + f"port {self.service_name} {default_port}" + ), + shell=True, + check=True, + capture_output=True, + ) + self._port = int(port_cmd.stdout.decode().strip().split(":")[1]) + return self._port diff --git a/mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml b/mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml new file mode 100644 index 0000000000..0bfd0bce81 --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/docker-compose.yml @@ -0,0 +1,40 @@ +name: mlos_bench-test-sql-storage +services: + mysql-mlos-bench-server: + hostname: mysql-mlos-bench-server + attach: false + image: docker.io/library/mysql:latest + ports: + # To allow multiple instances of this to coexist, instead of explicitly + # mapping the port, let it get assigned randomly on the host. + - ${PORT:-3306} + extra_hosts: + - host.docker.internal:host-gateway + environment: + - MYSQL_ROOT_PASSWORD=password + - MYSQL_DATABASE=mlos_bench + healthcheck: + test: ["CMD-SHELL", "mysqladmin --host localhost --protocol tcp --password=$${MYSQL_ROOT_PASSWORD} ping"] + interval: 10s + timeout: 30s + retries: 5 + start_period: 5s + postgres-mlos-bench-server: + hostname: postgres-mlos-bench-server + attach: false + image: docker.io/library/postgres:latest + ports: + # To allow multiple instances of this to coexist, instead of explicitly + # mapping the port, let it get assigned randomly on the host. + - ${PORT:-5432} + extra_hosts: + - host.docker.internal:host-gateway + environment: + - POSTGRES_PASSWORD=password + - POSTGRES_DB=mlos_bench + healthcheck: + test: ["CMD-SHELL", "pg_isready -d $${POSTGRES_DB}"] + interval: 10s + timeout: 30s + retries: 5 + start_period: 5s diff --git a/mlos_bench/mlos_bench/tests/storage/sql/down.sh b/mlos_bench/mlos_bench/tests/storage/sql/down.sh new file mode 100755 index 0000000000..3d6068cecf --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/down.sh @@ -0,0 +1,18 @@ +#!/bin/bash +## +## Copyright (c) Microsoft Corporation. +## Licensed under the MIT License. +## + +# A script to stop the containerized SQL DBMS servers. +# For pytest, the fixture in conftest.py will handle this for us using the +# pytest-docker plugin, but for manual testing, this script can be used. + +set -eu + +scriptdir=$(dirname "$(readlink -f "$0")") +cd "$scriptdir" + +PROJECT_NAME="mlos_bench-test-sql-storage-manual" + +docker compose -p "$PROJECT_NAME" down diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 0bebeeff82..50376b8727 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -8,9 +8,12 @@ import os import tempfile from collections.abc import Generator +from importlib.resources import files from random import seed as rand_seed +from filelock import FileLock import pytest +from pytest_docker.plugin import Services as DockerServices from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.schedulers.sync_scheduler import SyncScheduler @@ -19,16 +22,149 @@ from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.storage.storage_factory import from_config -from mlos_bench.tests import SEED +from mlos_bench.util import path_join +from mlos_bench.tests import SEED, wait_docker_service_socket from mlos_bench.tests.storage import ( CONFIG_TRIAL_REPEAT_COUNT, MAX_TRIALS, TRIAL_RUNNER_COUNT, ) +from mlos_bench.tests.storage.sql import ( + SqlTestServerInfo, + MYSQL_TEST_SERVER_NAME, + PGSQL_TEST_SERVER_NAME, +) from mlos_bench.tunables.tunable_groups import TunableGroups # pylint: disable=redefined-outer-name +# TODO: Add mysql_storage and postgres_storage + + +@pytest.fixture(scope="session") +def mysql_storage_info( + docker_hostname: str, + docker_compose_project_name: str, + locked_docker_services: DockerServices, +) -> SqlTestServerInfo: + """ + Fixture for getting mysql storage connection info. + """ + storage_info = SqlTestServerInfo( + compose_project_name=docker_compose_project_name, + service_name=MYSQL_TEST_SERVER_NAME, + hostname=docker_hostname, + ) + wait_docker_service_socket( + locked_docker_services, + storage_info.hostname, + storage_info.get_port(), + ) + return storage_info + + +@pytest.fixture(scope="session") +def postgres_storage_info( + docker_hostname: str, + docker_compose_project_name: str, + locked_docker_services: DockerServices, +) -> SqlTestServerInfo: + """ + Fixture for getting postgres storage connection info. + """ + storage_info = SqlTestServerInfo( + compose_project_name=docker_compose_project_name, + service_name=PGSQL_TEST_SERVER_NAME, + hostname=docker_hostname, + ) + wait_docker_service_socket( + locked_docker_services, + storage_info.hostname, + storage_info.get_port(), + ) + return storage_info + + +def _create_storage_from_test_server_info( + config_file: str, + test_server_info: SqlTestServerInfo, + shared_temp_dir: str, + short_testrun_uid: str, +) -> Generator[SqlStorage]: + """ + Creates a SqlStorage instance from the given test server info. + + Notes + ----- + Resets the schema as a cleanup operation on return from the function scope + fixture so each test gets a fresh storage instance. + Uses a file lock to ensure that only one test can access the storage at a time. + + Yields + ------ + SqlStorage + """ + sql_storage_name = test_server_info.service_name + with FileLock(path_join(shared_temp_dir, f"{sql_storage_name}-{short_testrun_uid}.lock")): + global_config = { + "host": test_server_info.username, + "port": test_server_info.get_port() or 0, + "database": test_server_info.database, + "username": test_server_info.username, + "password": test_server_info.password, + "lazy_schema_create": True, + } + storage = from_config( + config_file, + global_configs=[json.dumps(global_config)], + ) + assert isinstance(storage, SqlStorage) + yield storage + # Cleanup the storage on return + storage._reset_schema(force=True) # pylint: disable=protected-access + + +@pytest.fixture(scope="function") +def mysql_storage( + mysql_storage_info: SqlTestServerInfo, + shared_temp_dir: str, + short_testrun_uid: str, +) -> Generator[SqlStorage]: + """ + Fixture of a MySQL backed SqlStorage engine. + + See Also + -------- + _create_storage_from_test_server_info + """ + return _create_storage_from_test_server_info( + path_join(str(files("mlos_bench.config")), "storage", "mysql.jsonc"), + mysql_storage_info, + shared_temp_dir, + short_testrun_uid, + ) + + +@pytest.fixture(scope="function") +def postgres_storage( + postgres_storage_info: SqlTestServerInfo, + shared_temp_dir: str, + short_testrun_uid: str, +) -> Generator[SqlStorage]: + """ + Fixture of a MySQL backed SqlStorage engine. + + See Also + -------- + _create_storage_from_test_server_info + """ + return _create_storage_from_test_server_info( + path_join(str(files("mlos_bench.config")), "storage", "postgresql.jsonc"), + postgres_storage_info, + shared_temp_dir, + short_testrun_uid, + ) + @pytest.fixture def sqlite_storage() -> Generator[SqlStorage]: diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py similarity index 68% rename from mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py rename to mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py index 1cbdaa9c6a..660f4e245d 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py @@ -4,21 +4,37 @@ # """Test sql schemas for mlos_bench storage.""" +import pytest +from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture + from alembic.migration import MigrationContext from sqlalchemy import inspect from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.tests import DOCKER + # NOTE: This value is hardcoded to the latest revision in the alembic versions directory. # It could also be obtained programmatically using the "alembic heads" command or heads() API. # See Also: schema.py for an example of programmatic alembic config access. CURRENT_ALEMBIC_HEAD = "8928a401115b" -# TODO: Test other engines as well. - -def test_storage_schemas(storage: SqlStorage) -> None: +# Try to test multiple DBMS engines. +docker_dbms_fixtures = [] +if DOCKER: + docker_dbms_fixtures = [ + lazy_fixture("mysql_storage"), + lazy_fixture("postgres_storage"), + ] +@pytest.mark.parameterize( + ["some_sql_storage_fixture"], [ + lazy_fixture("sql_storage"), + *docker_dbms_fixtures, + ] +) +def test_storage_schemas(some_sql_storage_fixture: SqlStorage) -> None: """Test storage schema creation.""" - eng = storage._engine # pylint: disable=protected-access + eng = some_sql_storage_fixture._engine # pylint: disable=protected-access with eng.connect() as conn: # pylint: disable=protected-access inspector = inspect(conn) # Make sure the "trial_runner_id" column exists. diff --git a/mlos_bench/mlos_bench/tests/storage/sql/up.sh b/mlos_bench/mlos_bench/tests/storage/sql/up.sh new file mode 100755 index 0000000000..fd99d1770d --- /dev/null +++ b/mlos_bench/mlos_bench/tests/storage/sql/up.sh @@ -0,0 +1,87 @@ +#!/bin/bash +## +## Copyright (c) Microsoft Corporation. +## Licensed under the MIT License. +## + +# A script to start the containerized SQL DBMS servers. +# For pytest, the fixture in conftest.py will handle this for us using the +# pytest-docker plugin, but for manual testing, this script can be used. + +set -eu +set -x + +scriptdir=$(dirname "$(readlink -f "$0")") +cd "$scriptdir" + +PROJECT_NAME="mlos_bench-test-sql-storage-manual" +CONTAINER_COUNT=2 + +if ! type mysqladmin >/dev/null 2>&1; then + echo "ERROR: Missing mysqladmin tool to check status of the server." >&2 + exit 1 +fi + +if ! type psql >/dev/null 2>&1; then + echo "ERROR: Missing psql tool to check status of the server." >&2 + exit 1 +fi + + +docker compose -p "$PROJECT_NAME" up --build --remove-orphans -d +set +x + +while ! docker compose -p "$PROJECT_NAME" ps --format '{{.Name}} {{.State}} {{.Health}}' | grep -c ' running healthy$' | grep -q -x $CONTAINER_COUNT; do + echo "Waiting for $CONTAINER_COUNT containers to report healthy ..." + sleep 1 +done + +mysql_port=$(docker compose -p "$PROJECT_NAME" port mysql-mlos-bench-server ${PORT:-3306} | cut -d: -f2) +echo "Connect to the mysql server container at the following port: $mysql_port" +postgres_port=$(docker compose -p "$PROJECT_NAME" port postgres-mlos-bench-server ${PORT:-5432} | cut -d: -f2) +echo "Connect to the postgres server container at the following port: $postgres_port" + +# TODO: Remove the rest of this: + +mysql_ok=0 +if ! type mysqladmin >/dev/null 2>&1; then + echo "WARNING: Missing mysqladmin tool to check status of the server." >&2 + mysql_ok=1 +fi + +pg_ok=0 +if ! type psql >/dev/null 2>&1; then + echo "WARNING: Missing psql tool to check status of the server." >&2 + pg_ok=1 +fi + +for i in {1..10}; do + if [ "$mysql_ok" == "1" ]; then + break + fi + if mysqladmin -h localhost --port $mysql_port --protocol tcp --password=password ping >/dev/null; then + mysql_ok=1 + else + sleep 1 + fi +done +if [ "$mysql_ok" != 1 ]; then + echo "ERR: MySQL failed to start." >&2 + exit 1 +fi + +for i in {1..10}; do + if [ "$pg_ok" == "1" ]; then + break + fi + if PGPASSWORD=password psql -h localhost -p $postgres_port -U postgres mlos_bench -c "SELECT 1;" > /dev/null; then + pg_ok=1 + break + else + sleep 1 + fi +done +if [ "$pg_ok" != 1 ]; then + echo "ERR: Postgres failed to start." >&2 + exit 1 +fi diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py index 7871e7f68c..1aa29b5efe 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py @@ -9,11 +9,20 @@ from typing import Literal import pytest +from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture from pytz import UTC from mlos_bench.environments.status import Status -from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.storage.base_storage import Storage from mlos_bench.tunables.tunable_groups import TunableGroups +from mlos_bench.tests import DOCKER + +docker_dbms_fixtures = [] +if DOCKER: + docker_dbms_fixtures = [ + lazy_fixture("mysql_storage"), + lazy_fixture("postgres_storage"), + ] # TODO: When we introduce ParallelTrialScheduler warn at config startup time @@ -22,14 +31,25 @@ sys.platform == "win32", reason="Windows doesn't support multiple processes accessing the same file.", ) +@pytest.mark.parametrize( + ["persistent_storage"], + [ + # TODO: Improve this test to support non-sql backends eventually as well. + lazy_fixture("sqlite_storage"), + *docker_dbms_fixtures, + ], +) def test_storage_pickle_restore_experiment_and_trial( - sqlite_storage: SqlStorage, + persistent_storage: Storage, tunable_groups: TunableGroups, ) -> None: """Check that we can pickle and unpickle the Storage object, and restore Experiment and Trial by id. """ - storage = sqlite_storage + storage = persistent_storage + storage_class = storage.__class__ + assert issubclass(storage_class, Storage) + assert storage_class != Storage # Create an Experiment and a Trial opt_targets: dict[str, Literal["min", "max"]] = {"metric": "min"} experiment = storage.experiment( @@ -49,7 +69,7 @@ def test_storage_pickle_restore_experiment_and_trial( # Pickle and unpickle the Storage object pickled = pickle.dumps(storage) restored_storage = pickle.loads(pickled) - assert isinstance(restored_storage, SqlStorage) + assert isinstance(restored_storage, storage_class) # Restore the Experiment from storage by id and check that it matches the original restored_experiment = restored_storage.get_experiment_by_id( From 1c9633c61c5da559e35d66e6dc8dd6c3016a739c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 16:35:31 -0500 Subject: [PATCH 12/49] change column lengths for mysql --- mlos_bench/mlos_bench/storage/sql/schema.py | 56 ++++++++++++--------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index c6a7849880..48d06f63c5 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -72,12 +72,6 @@ class DbSchema: # for all DB tables, so it's ok to disable the warnings. # pylint: disable=too-many-instance-attributes - # Common string column sizes. - _ID_LEN = 512 - _PARAM_VALUE_LEN = 1024 - _METRIC_VALUE_LEN = 255 - _STATUS_LEN = 16 - def __init__(self, engine: Engine | None): """ Declare the SQLAlchemy schema for the database. @@ -95,10 +89,24 @@ def __init__(self, engine: Engine | None): self._engine = engine self._meta = MetaData() + # Common string column sizes. + self._exp_id_len = 512 + self._param_id_len = 512 + self._param_value_len = 1024 + self._metric_id_len = 512 + self._metric_value_len = 255 + self._status_len = 16 + + # Some overrides for certain DB engines: + if engine and engine.dialect.name in {"mysql", "mariadb"}: + self._exp_id_len = 255 + self._param_id_len = 255 + self._metric_id_len = 255 + self.experiment = Table( "experiment", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("description", String(1024)), Column("root_env_config", String(1024), nullable=False), Column("git_repo", String(1024), nullable=False), @@ -108,7 +116,7 @@ def __init__(self, engine: Engine | None): Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. - Column("status", String(self._STATUS_LEN)), + Column("status", String(self._status_len)), # There may be more than one mlos_benchd_service running on different hosts. # This column stores the host/container name of the driver that # picked up the experiment. @@ -126,7 +134,7 @@ def __init__(self, engine: Engine | None): "objectives", self._meta, Column("exp_id"), - Column("optimization_target", String(self._ID_LEN), nullable=False), + Column("optimization_target", String(self._metric_id_len), nullable=False), Column("optimization_direction", String(4), nullable=False), # TODO: Note: weight is not fully supported yet as currently # multi-objective is expected to explore each objective equally. @@ -175,14 +183,14 @@ def __init__(self, engine: Engine | None): self.trial = Table( "trial", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), Column("ts_start", DateTime, nullable=False), Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: - Column("status", String(self._STATUS_LEN), nullable=False), + Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), ForeignKeyConstraint(["exp_id"], [self.experiment.c.exp_id]), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), @@ -197,8 +205,8 @@ def __init__(self, engine: Engine | None): "config_param", self._meta, Column("config_id", Integer, nullable=False), - Column("param_id", String(self._ID_LEN), nullable=False), - Column("param_value", String(self._PARAM_VALUE_LEN)), + Column("param_id", String(self._param_id_len), nullable=False), + Column("param_value", String(self._param_value_len)), PrimaryKeyConstraint("config_id", "param_id"), ForeignKeyConstraint(["config_id"], [self.config.c.config_id]), ) @@ -212,10 +220,10 @@ def __init__(self, engine: Engine | None): self.trial_param = Table( "trial_param", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("param_id", String(self._ID_LEN), nullable=False), - Column("param_value", String(self._PARAM_VALUE_LEN)), + Column("param_id", String(self._param_id_len), nullable=False), + Column("param_value", String(self._param_value_len)), PrimaryKeyConstraint("exp_id", "trial_id", "param_id"), ForeignKeyConstraint( ["exp_id", "trial_id"], @@ -230,10 +238,10 @@ def __init__(self, engine: Engine | None): self.trial_status = Table( "trial_status", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), Column("ts", DateTime(timezone=True), nullable=False, default="now"), - Column("status", String(self._STATUS_LEN), nullable=False), + Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( ["exp_id", "trial_id"], @@ -247,10 +255,10 @@ def __init__(self, engine: Engine | None): self.trial_result = Table( "trial_result", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("metric_id", String(self._ID_LEN), nullable=False), - Column("metric_value", String(self._METRIC_VALUE_LEN)), + Column("metric_id", String(self._metric_id_len), nullable=False), + Column("metric_value", String(self._metric_value_len)), PrimaryKeyConstraint("exp_id", "trial_id", "metric_id"), ForeignKeyConstraint( ["exp_id", "trial_id"], @@ -265,11 +273,11 @@ def __init__(self, engine: Engine | None): self.trial_telemetry = Table( "trial_telemetry", self._meta, - Column("exp_id", String(self._ID_LEN), nullable=False), + Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), Column("ts", DateTime(timezone=True), nullable=False, default="now"), - Column("metric_id", String(self._ID_LEN), nullable=False), - Column("metric_value", String(self._METRIC_VALUE_LEN)), + Column("metric_id", String(self._exp_id_len), nullable=False), + Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), ForeignKeyConstraint( ["exp_id", "trial_id"], From f5b7bf364d52dc60d672c361611a460e139ec492 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 16:43:48 -0500 Subject: [PATCH 13/49] fixups --- mlos_bench/mlos_bench/tests/__init__.py | 16 +--------------- mlos_bench/mlos_bench/tests/conftest.py | 18 +++++++++++++++++- .../tests/services/remote/ssh/conftest.py | 1 - .../tests/services/remote/ssh/fixtures.py | 1 - .../remote/ssh/test_ssh_host_service.py | 2 +- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index 2d9112e52d..a39337648d 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -16,6 +16,7 @@ from subprocess import run import pytest +from pytest_docker.plugin import Services as DockerServices import pytz from mlos_bench.util import get_class_from_name, nullable @@ -86,21 +87,6 @@ def check_class_name(obj: object, expected_class_name: str) -> bool: full_class_name = obj.__class__.__module__ + "." + obj.__class__.__name__ return full_class_name == try_resolve_class_name(expected_class_name) -HOST_DOCKER_NAME = "host.docker.internal" - - -@pytest.fixture(scope="session") -def docker_hostname() -> str: - """Returns the local hostname to use to connect to the test ssh server.""" - if sys.platform != "win32" and resolve_host_name(HOST_DOCKER_NAME): - # On Linux, if we're running in a docker container, we can use the - # --add-host (extra_hosts in docker-compose.yml) to refer to the host IP. - return HOST_DOCKER_NAME - # Docker (Desktop) for Windows (WSL2) uses a special networking magic - # to refer to the host machine as `localhost` when exposing ports. - # In all other cases, assume we're executing directly inside conda on the host. - return "localhost" - def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: """Wait until a docker service is ready.""" diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index 8e0aba5c00..8d48d2094c 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -15,7 +15,7 @@ from pytest_docker.plugin import get_docker_services from mlos_bench.environments.mock_env import MockEnv -from mlos_bench.tests import SEED, tunable_groups_fixtures +from mlos_bench.tests import SEED, tunable_groups_fixtures, resolve_host_name from mlos_bench.tunables.tunable_groups import TunableGroups # pylint: disable=redefined-outer-name @@ -29,6 +29,22 @@ covariant_group = tunable_groups_fixtures.covariant_group +HOST_DOCKER_NAME = "host.docker.internal" + + +@pytest.fixture(scope="session") +def docker_hostname() -> str: + """Returns the local hostname to use to connect to the test ssh server.""" + if sys.platform != "win32" and resolve_host_name(HOST_DOCKER_NAME): + # On Linux, if we're running in a docker container, we can use the + # --add-host (extra_hosts in docker-compose.yml) to refer to the host IP. + return HOST_DOCKER_NAME + # Docker (Desktop) for Windows (WSL2) uses a special networking magic + # to refer to the host machine as `localhost` when exposing ports. + # In all other cases, assume we're executing directly inside conda on the host. + return "localhost" + + @pytest.fixture def mock_env(tunable_groups: TunableGroups) -> MockEnv: """Test fixture for MockEnv.""" diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/conftest.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/conftest.py index 34006985af..83e98c44a2 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/conftest.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/conftest.py @@ -7,7 +7,6 @@ import mlos_bench.tests.services.remote.ssh.fixtures as ssh_fixtures # Expose some of those as local names so they can be picked up as fixtures by pytest. -ssh_test_server_hostname = ssh_fixtures.ssh_test_server_hostname ssh_test_server = ssh_fixtures.ssh_test_server alt_test_server = ssh_fixtures.alt_test_server reboot_test_server = ssh_fixtures.reboot_test_server diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py index 23a4a9ed2f..fda84e9f79 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/fixtures.py @@ -9,7 +9,6 @@ """ import os -import sys import tempfile from collections.abc import Generator from subprocess import run diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py index 30fdc804f2..1dce67a13d 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/test_ssh_host_service.py @@ -12,7 +12,7 @@ from mlos_bench.services.remote.ssh.ssh_host_service import SshHostService from mlos_bench.services.remote.ssh.ssh_service import SshClient -from mlos_bench.tests import requires_docker, wait_docker_service_socket, +from mlos_bench.tests import requires_docker, wait_docker_service_socket from mlos_bench.tests.services.remote.ssh import ( ALT_TEST_SERVER_NAME, REBOOT_TEST_SERVER_NAME, From 7fdf06dead9b182f02f454c63378161243fe3d53 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 17:46:07 -0500 Subject: [PATCH 14/49] fixup --- mlos_bench/mlos_bench/tests/__init__.py | 30 +++++++++++++++++++ .../mlos_bench/tests/storage/conftest.py | 2 ++ .../mlos_bench/tests/storage/sql/fixtures.py | 29 ++++++++++-------- .../tests/storage/sql/test_storage_schemas.py | 15 ++++++---- 4 files changed, 59 insertions(+), 17 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index a39337648d..98324c6ebb 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -8,6 +8,7 @@ Used to make mypy happy about multiple conftest.py modules. """ import filecmp +import json import os import shutil import socket @@ -88,6 +89,35 @@ def check_class_name(obj: object, expected_class_name: str) -> bool: return full_class_name == try_resolve_class_name(expected_class_name) +def is_docker_service_healthy( + compose_project_name: str, + service_name: str, +) -> bool: + """Check if a docker service is healthy.""" + docker_ps_out = run( + f"docker compose -p {compose_project_name} " f"ps --format json {service_name}", + shell=True, + check=True, + capture_output=True, + ) + docker_ps_json = json.loads(docker_ps_out.stdout.decode().strip()) + return docker_ps_json["State"] == "running" and docker_ps_json["Health"] == "healthy" + + +def wait_docker_service_healthy( + docker_services: DockerServices, + project_name: str, + service_name: str, + timeout: float = 30.0, +) -> None: + """Wait until a docker service is healthy.""" + docker_services.wait_until_responsive( + check=lambda: is_docker_service_healthy(project_name, service_name), + timeout=timeout, + pause=0.5, + ) + + def wait_docker_service_socket(docker_services: DockerServices, hostname: str, port: int) -> None: """Wait until a docker service is ready.""" docker_services.wait_until_responsive( diff --git a/mlos_bench/mlos_bench/tests/storage/conftest.py b/mlos_bench/mlos_bench/tests/storage/conftest.py index 526ea945f2..acdbe5024a 100644 --- a/mlos_bench/mlos_bench/tests/storage/conftest.py +++ b/mlos_bench/mlos_bench/tests/storage/conftest.py @@ -11,7 +11,9 @@ # same. # Expose some of those as local names so they can be picked up as fixtures by pytest. +mysql_storage_info = sql_storage_fixtures.mysql_storage_info mysql_storage = sql_storage_fixtures.mysql_storage +postgres_storage_info = sql_storage_fixtures.postgres_storage_info postgres_storage = sql_storage_fixtures.postgres_storage sqlite_storage = sql_storage_fixtures.sqlite_storage storage = sql_storage_fixtures.storage diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 50376b8727..eb3c1c3eca 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -4,6 +4,7 @@ # """Test fixtures for mlos_bench storage.""" +from contextlib import contextmanager import json import os import tempfile @@ -23,7 +24,7 @@ from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.storage.storage_factory import from_config from mlos_bench.util import path_join -from mlos_bench.tests import SEED, wait_docker_service_socket +from mlos_bench.tests import SEED, wait_docker_service_healthy from mlos_bench.tests.storage import ( CONFIG_TRIAL_REPEAT_COUNT, MAX_TRIALS, @@ -55,11 +56,12 @@ def mysql_storage_info( service_name=MYSQL_TEST_SERVER_NAME, hostname=docker_hostname, ) - wait_docker_service_socket( + wait_docker_service_healthy( locked_docker_services, - storage_info.hostname, - storage_info.get_port(), + storage_info.compose_project_name, + storage_info.service_name, ) + return storage_info @@ -77,14 +79,15 @@ def postgres_storage_info( service_name=PGSQL_TEST_SERVER_NAME, hostname=docker_hostname, ) - wait_docker_service_socket( + wait_docker_service_healthy( locked_docker_services, - storage_info.hostname, - storage_info.get_port(), + storage_info.compose_project_name, + storage_info.service_name, ) return storage_info +@contextmanager def _create_storage_from_test_server_info( config_file: str, test_server_info: SqlTestServerInfo, @@ -107,7 +110,7 @@ def _create_storage_from_test_server_info( sql_storage_name = test_server_info.service_name with FileLock(path_join(shared_temp_dir, f"{sql_storage_name}-{short_testrun_uid}.lock")): global_config = { - "host": test_server_info.username, + "host": test_server_info.hostname, "port": test_server_info.get_port() or 0, "database": test_server_info.database, "username": test_server_info.username, @@ -137,12 +140,13 @@ def mysql_storage( -------- _create_storage_from_test_server_info """ - return _create_storage_from_test_server_info( + with _create_storage_from_test_server_info( path_join(str(files("mlos_bench.config")), "storage", "mysql.jsonc"), mysql_storage_info, shared_temp_dir, short_testrun_uid, - ) + ) as storage: + yield storage @pytest.fixture(scope="function") @@ -158,12 +162,13 @@ def postgres_storage( -------- _create_storage_from_test_server_info """ - return _create_storage_from_test_server_info( + with _create_storage_from_test_server_info( path_join(str(files("mlos_bench.config")), "storage", "postgresql.jsonc"), postgres_storage_info, shared_temp_dir, short_testrun_uid, - ) + ) as storage: + yield storage @pytest.fixture diff --git a/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py index 660f4e245d..569b19984a 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py @@ -11,6 +11,7 @@ from sqlalchemy import inspect from mlos_bench.storage.sql.storage import SqlStorage +from mlos_bench.tests.storage.sql.fixtures import storage as sql_storage from mlos_bench.tests import DOCKER @@ -26,14 +27,18 @@ lazy_fixture("mysql_storage"), lazy_fixture("postgres_storage"), ] -@pytest.mark.parameterize( - ["some_sql_storage_fixture"], [ - lazy_fixture("sql_storage"), - *docker_dbms_fixtures, - ] + + +@pytest.mark.parametrize( + "some_sql_storage_fixture", + [ + lazy_fixture("sqlite_storage"), + *docker_dbms_fixtures, + ], ) def test_storage_schemas(some_sql_storage_fixture: SqlStorage) -> None: """Test storage schema creation.""" + assert isinstance(some_sql_storage_fixture, SqlStorage) eng = some_sql_storage_fixture._engine # pylint: disable=protected-access with eng.connect() as conn: # pylint: disable=protected-access inspector = inspect(conn) From 1bf3c30d733b4d750f6cb289c3cdb495cad67795 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 17:46:12 -0500 Subject: [PATCH 15/49] fixup --- mlos_bench/mlos_bench/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index 8d48d2094c..7885bfcbe4 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -42,7 +42,7 @@ def docker_hostname() -> str: # Docker (Desktop) for Windows (WSL2) uses a special networking magic # to refer to the host machine as `localhost` when exposing ports. # In all other cases, assume we're executing directly inside conda on the host. - return "localhost" + return "127.0.0.1" # "localhost" @pytest.fixture From bd5862f12c7f3be114730f08cb07847094801d22 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 17:46:17 -0500 Subject: [PATCH 16/49] cleanup --- mlos_bench/mlos_bench/tests/storage/sql/up.sh | 65 +++---------------- 1 file changed, 8 insertions(+), 57 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/up.sh b/mlos_bench/mlos_bench/tests/storage/sql/up.sh index fd99d1770d..36fea613b9 100755 --- a/mlos_bench/mlos_bench/tests/storage/sql/up.sh +++ b/mlos_bench/mlos_bench/tests/storage/sql/up.sh @@ -17,71 +17,22 @@ cd "$scriptdir" PROJECT_NAME="mlos_bench-test-sql-storage-manual" CONTAINER_COUNT=2 -if ! type mysqladmin >/dev/null 2>&1; then - echo "ERROR: Missing mysqladmin tool to check status of the server." >&2 - exit 1 -fi - -if ! type psql >/dev/null 2>&1; then - echo "ERROR: Missing psql tool to check status of the server." >&2 - exit 1 -fi - - docker compose -p "$PROJECT_NAME" up --build --remove-orphans -d set +x -while ! docker compose -p "$PROJECT_NAME" ps --format '{{.Name}} {{.State}} {{.Health}}' | grep -c ' running healthy$' | grep -q -x $CONTAINER_COUNT; do +function get_project_health() { + docker compose -p "$PROJECT_NAME" ps --format '{{.Name}} {{.State}} {{.Health}}' +} + +project_health=$(get_project_health) +while ! echo "$project_health" | grep -c ' running healthy$' | grep -q -x $CONTAINER_COUNT; do echo "Waiting for $CONTAINER_COUNT containers to report healthy ..." + echo "$project_health" sleep 1 + project_health=$(get_project_health) done mysql_port=$(docker compose -p "$PROJECT_NAME" port mysql-mlos-bench-server ${PORT:-3306} | cut -d: -f2) echo "Connect to the mysql server container at the following port: $mysql_port" postgres_port=$(docker compose -p "$PROJECT_NAME" port postgres-mlos-bench-server ${PORT:-5432} | cut -d: -f2) echo "Connect to the postgres server container at the following port: $postgres_port" - -# TODO: Remove the rest of this: - -mysql_ok=0 -if ! type mysqladmin >/dev/null 2>&1; then - echo "WARNING: Missing mysqladmin tool to check status of the server." >&2 - mysql_ok=1 -fi - -pg_ok=0 -if ! type psql >/dev/null 2>&1; then - echo "WARNING: Missing psql tool to check status of the server." >&2 - pg_ok=1 -fi - -for i in {1..10}; do - if [ "$mysql_ok" == "1" ]; then - break - fi - if mysqladmin -h localhost --port $mysql_port --protocol tcp --password=password ping >/dev/null; then - mysql_ok=1 - else - sleep 1 - fi -done -if [ "$mysql_ok" != 1 ]; then - echo "ERR: MySQL failed to start." >&2 - exit 1 -fi - -for i in {1..10}; do - if [ "$pg_ok" == "1" ]; then - break - fi - if PGPASSWORD=password psql -h localhost -p $postgres_port -U postgres mlos_bench -c "SELECT 1;" > /dev/null; then - pg_ok=1 - break - else - sleep 1 - fi -done -if [ "$pg_ok" != 1 ]; then - echo "ERR: Postgres failed to start." >&2 - exit 1 -fi From 67f84ead1d571c8357a659dae0321e489dc45d00 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 17:51:16 -0500 Subject: [PATCH 17/49] fixup --- mlos_bench/mlos_bench/storage/sql/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 48d06f63c5..28f00a8af3 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -276,7 +276,7 @@ def __init__(self, engine: Engine | None): Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), Column("ts", DateTime(timezone=True), nullable=False, default="now"), - Column("metric_id", String(self._exp_id_len), nullable=False), + Column("metric_id", String(self._metric_id_len), nullable=False), Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), ForeignKeyConstraint( From 41199fc5c6125c2d6ce121b6eb78a3e7461ea5e0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 17:54:05 -0500 Subject: [PATCH 18/49] format --- .../mlos_bench/storage/sql/alembic/README.md | 31 ++++++++++--------- mlos_bench/mlos_bench/tests/conftest.py | 2 +- .../tests/services/remote/ssh/__init__.py | 4 +-- .../mlos_bench/tests/storage/sql/__init__.py | 4 +-- .../mlos_bench/tests/storage/sql/fixtures.py | 16 ++++------ .../tests/storage/sql/test_storage_schemas.py | 6 ++-- .../tests/storage/test_storage_pickling.py | 2 +- 7 files changed, 30 insertions(+), 35 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md index 9a9aafe1f4..50164ed005 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/README.md +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -13,6 +13,7 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla > In the remainder of this document we should some examples for different DB types. > Pick the one you're targeting and stick with it thru the example. > You may need to repeat the process several times to test all of them. + > > - [ ] TODO: Add scripts to automatically do this for several different backend engines all at once. For instance: @@ -63,21 +64,21 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password ``` -2. Now, adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. +1. Now, adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. > Keep each change small and atomic. > For example, if you want to add a new column, do that in one change. > If you want to rename a column, do that in another change. -3. Generate a new migration script with the following command: +1. Generate a new migration script with the following command: ```sh alembic revision --autogenerate -m "CHANGEME: Descriptive text about the change." ``` -4. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. +1. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. -5. Verify that the migration script works by running the following command: +1. Verify that the migration script works by running the following command: ```sh # sqlite @@ -116,22 +117,22 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla > Use different CLI clients for targeting other engines. -6. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. +1. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. > Be sure to update the latest version in the [`test_storage_schemas.py`](../../../tests/storage/test_storage_schemas.py) file as well. -7. Cleanup any server instances you started. +1. Cleanup any server instances you started. - For instance: + For instance: - ```sh - rm mlos_bench/storage/sql/mlos_bench.sqlite - ``` + ```sh + rm mlos_bench/storage/sql/mlos_bench.sqlite + ``` - ```sh - docker kill mysql-alembic - ``` + ```sh + docker kill mysql-alembic + ``` -8. Merge that to the `main` branch. +1. Merge that to the `main` branch. -9. Might be good to cut a new `mlos_bench` release at this point as well. +1. Might be good to cut a new `mlos_bench` release at this point as well. diff --git a/mlos_bench/mlos_bench/tests/conftest.py b/mlos_bench/mlos_bench/tests/conftest.py index 7885bfcbe4..59dfbff9d2 100644 --- a/mlos_bench/mlos_bench/tests/conftest.py +++ b/mlos_bench/mlos_bench/tests/conftest.py @@ -15,7 +15,7 @@ from pytest_docker.plugin import get_docker_services from mlos_bench.environments.mock_env import MockEnv -from mlos_bench.tests import SEED, tunable_groups_fixtures, resolve_host_name +from mlos_bench.tests import SEED, resolve_host_name, tunable_groups_fixtures from mlos_bench.tunables.tunable_groups import TunableGroups # pylint: disable=redefined-outer-name diff --git a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py index 877d39a26a..de822fe2eb 100644 --- a/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py +++ b/mlos_bench/mlos_bench/tests/services/remote/ssh/__init__.py @@ -7,7 +7,6 @@ from dataclasses import dataclass from subprocess import run - # The SSH test server port and name. # See Also: docker-compose.yml SSH_TEST_SERVER_PORT = 2254 @@ -18,7 +17,8 @@ @dataclass class SshTestServerInfo: - """A data class for SshTestServerInfo. + """ + A data class for SshTestServerInfo. See Also -------- diff --git a/mlos_bench/mlos_bench/tests/storage/sql/__init__.py b/mlos_bench/mlos_bench/tests/storage/sql/__init__.py index 442e531a43..d1a7c3c800 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/__init__.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/__init__.py @@ -7,7 +7,6 @@ from dataclasses import dataclass from subprocess import run - # The DB servers' names and other connection info. # See Also: docker-compose.yml @@ -20,7 +19,8 @@ @dataclass class SqlTestServerInfo: - """A data class for SqlTestServerInfo. + """ + A data class for SqlTestServerInfo. See Also -------- diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index eb3c1c3eca..d7f058483c 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -4,16 +4,16 @@ # """Test fixtures for mlos_bench storage.""" -from contextlib import contextmanager import json import os import tempfile from collections.abc import Generator +from contextlib import contextmanager from importlib.resources import files from random import seed as rand_seed -from filelock import FileLock import pytest +from filelock import FileLock from pytest_docker.plugin import Services as DockerServices from mlos_bench.optimizers.mock_optimizer import MockOptimizer @@ -23,7 +23,6 @@ from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.storage.storage_factory import from_config -from mlos_bench.util import path_join from mlos_bench.tests import SEED, wait_docker_service_healthy from mlos_bench.tests.storage import ( CONFIG_TRIAL_REPEAT_COUNT, @@ -31,11 +30,12 @@ TRIAL_RUNNER_COUNT, ) from mlos_bench.tests.storage.sql import ( - SqlTestServerInfo, MYSQL_TEST_SERVER_NAME, PGSQL_TEST_SERVER_NAME, + SqlTestServerInfo, ) from mlos_bench.tunables.tunable_groups import TunableGroups +from mlos_bench.util import path_join # pylint: disable=redefined-outer-name @@ -48,9 +48,7 @@ def mysql_storage_info( docker_compose_project_name: str, locked_docker_services: DockerServices, ) -> SqlTestServerInfo: - """ - Fixture for getting mysql storage connection info. - """ + """Fixture for getting mysql storage connection info.""" storage_info = SqlTestServerInfo( compose_project_name=docker_compose_project_name, service_name=MYSQL_TEST_SERVER_NAME, @@ -71,9 +69,7 @@ def postgres_storage_info( docker_compose_project_name: str, locked_docker_services: DockerServices, ) -> SqlTestServerInfo: - """ - Fixture for getting postgres storage connection info. - """ + """Fixture for getting postgres storage connection info.""" storage_info = SqlTestServerInfo( compose_project_name=docker_compose_project_name, service_name=PGSQL_TEST_SERVER_NAME, diff --git a/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py index 569b19984a..32653b474e 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py @@ -5,15 +5,13 @@ """Test sql schemas for mlos_bench storage.""" import pytest -from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture - from alembic.migration import MigrationContext +from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture from sqlalchemy import inspect from mlos_bench.storage.sql.storage import SqlStorage -from mlos_bench.tests.storage.sql.fixtures import storage as sql_storage - from mlos_bench.tests import DOCKER +from mlos_bench.tests.storage.sql.fixtures import storage as sql_storage # NOTE: This value is hardcoded to the latest revision in the alembic versions directory. # It could also be obtained programmatically using the "alembic heads" command or heads() API. diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py index 1aa29b5efe..bb54ea66c2 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py @@ -14,8 +14,8 @@ from mlos_bench.environments.status import Status from mlos_bench.storage.base_storage import Storage -from mlos_bench.tunables.tunable_groups import TunableGroups from mlos_bench.tests import DOCKER +from mlos_bench.tunables.tunable_groups import TunableGroups docker_dbms_fixtures = [] if DOCKER: From ee4a900b410fe99060776fb776cfcd37fb683b1c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 28 May 2025 23:04:12 +0000 Subject: [PATCH 19/49] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/tests/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index 98324c6ebb..fecb883971 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -17,8 +17,8 @@ from subprocess import run import pytest -from pytest_docker.plugin import Services as DockerServices import pytz +from pytest_docker.plugin import Services as DockerServices from mlos_bench.util import get_class_from_name, nullable From 82220ecc4cd9f05b9368cc912b46808ddcea0293 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 18:08:10 -0500 Subject: [PATCH 20/49] fixup --- mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py index bb54ea66c2..365c68af93 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py @@ -32,7 +32,7 @@ reason="Windows doesn't support multiple processes accessing the same file.", ) @pytest.mark.parametrize( - ["persistent_storage"], + "persistent_storage", [ # TODO: Improve this test to support non-sql backends eventually as well. lazy_fixture("sqlite_storage"), From aa2621b4ae1e059c0a08f3cadb26c83b83e91118 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Wed, 28 May 2025 18:10:26 -0500 Subject: [PATCH 21/49] fixup --- mlos_bench/mlos_bench/tests/environments/remote/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/environments/remote/conftest.py b/mlos_bench/mlos_bench/tests/environments/remote/conftest.py index 257e37fa9e..b8ea5a2a6b 100644 --- a/mlos_bench/mlos_bench/tests/environments/remote/conftest.py +++ b/mlos_bench/mlos_bench/tests/environments/remote/conftest.py @@ -8,4 +8,3 @@ # Expose some of those as local names so they can be picked up as fixtures by pytest. ssh_test_server = ssh_fixtures.ssh_test_server -ssh_test_server_hostname = ssh_fixtures.ssh_test_server_hostname From a84d30bab434a0f360cb81880889973245fb9c96 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 10:28:12 -0500 Subject: [PATCH 22/49] switch to interprocesslock - already using that --- mlos_bench/mlos_bench/tests/storage/sql/fixtures.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index d7f058483c..a582da5dff 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -13,7 +13,7 @@ from random import seed as rand_seed import pytest -from filelock import FileLock +from fasteners import InterProcessLock from pytest_docker.plugin import Services as DockerServices from mlos_bench.optimizers.mock_optimizer import MockOptimizer @@ -104,7 +104,9 @@ def _create_storage_from_test_server_info( SqlStorage """ sql_storage_name = test_server_info.service_name - with FileLock(path_join(shared_temp_dir, f"{sql_storage_name}-{short_testrun_uid}.lock")): + with InterProcessLock( + path_join(shared_temp_dir, f"{sql_storage_name}-{short_testrun_uid}.lock") + ): global_config = { "host": test_server_info.hostname, "port": test_server_info.get_port() or 0, From 2341a10fecfede15ea2f0f7faa9b4cc8334cee24 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 10:31:52 -0500 Subject: [PATCH 23/49] address a lint issue --- mlos_bench/mlos_bench/tests/storage/sql/fixtures.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index a582da5dff..572b1a4c0d 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -120,9 +120,11 @@ def _create_storage_from_test_server_info( global_configs=[json.dumps(global_config)], ) assert isinstance(storage, SqlStorage) - yield storage - # Cleanup the storage on return - storage._reset_schema(force=True) # pylint: disable=protected-access + try: + yield storage + finally: + # Cleanup the storage on return + storage._reset_schema(force=True) # pylint: disable=protected-access @pytest.fixture(scope="function") From 45ad11ffb8f3eed0b3a46987d7897bb732b9c5f8 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 10:33:33 -0500 Subject: [PATCH 24/49] restore the original alembic comments - moving to separate PR --- mlos_bench/mlos_bench/storage/sql/alembic.ini | 3 - .../mlos_bench/storage/sql/alembic/README.md | 104 ++---------------- 2 files changed, 9 insertions(+), 98 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index 7f88d809ee..4d2a1120c5 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -63,10 +63,7 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # output_encoding = utf-8 # See README.md for details. -# Uncomment one of these: sqlalchemy.url = sqlite:///mlos_bench.sqlite -#sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench -#sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench [post_write_hooks] diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md index 50164ed005..ec35eb70f6 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/README.md +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -4,67 +4,17 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla ## Overview -1. Create a blank database instance in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: +1. Create a blank `mlos_bench.sqlite` database file in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: - This allows `alembic` to automatically generate a migration script from the current schema. - - > NOTE: If your schema changes target a particular backend engine (e.g., using `with_variant`) you will need to use an engine with that config for this step. - > \ - > In the remainder of this document we should some examples for different DB types. - > Pick the one you're targeting and stick with it thru the example. - > You may need to repeat the process several times to test all of them. - > - > - [ ] TODO: Add scripts to automatically do this for several different backend engines all at once. - - For instance: - - 1. Start a temporary server either as a local file or in a docker instance - - ```sh - # sqlite - cd mlos_bench/storage/sql - rm -f mlos_bench.sqlite - ``` - - ```sh - # mysql - docker run -it --rm --name mysql-alembic --env MYSQL_ROOT_PASSWORD=password --env MYSQL_DATABASE=mlos_bench -p 3306:3306 mysql:latest - ``` - - ```sh - # postgres - docker run -it --rm --name postgres-alembic --env POSTGRES_PASSWORD=password --env POSTGRES_DB=mlos_bench -p 5432:5432 postgres:latest - ``` - - 1. Adjust the `sqlalchemy.url` in the [`alembic.ini`](../alembic.ini) file. - - ```ini - # Uncomment one of these. - # See README.md for details. - - #sqlalchemy.url = sqlite:///mlos_bench.sqlite - sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench - #sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench - ``` - - 1. Prime the DB schema - - ```sh - # sqlite - mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only --password=password - ``` - - ```sh - # mysql - mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password - ``` + ```sh + cd mlos_bench/storage/sql + rm mlos_bench.sqlite + mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only + ``` - ```sh - # postgres - mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password - ``` + > This allows `alembic` to automatically generate a migration script from the current schema. -1. Now, adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. +1. Adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. > Keep each change small and atomic. > For example, if you want to add a new column, do that in one change. @@ -73,7 +23,7 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla 1. Generate a new migration script with the following command: ```sh - alembic revision --autogenerate -m "CHANGEME: Descriptive text about the change." + alembic revision --autogenerate -m "Descriptive text about the change." ``` 1. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. @@ -81,58 +31,22 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla 1. Verify that the migration script works by running the following command: ```sh - # sqlite mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only ``` - ```sh - # mysql: - mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password - ``` - - ```sh - # postgres: - mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password - ``` - > Normally this would be done with `alembic upgrade head`, but this command is convenient to ensure if will work with the `mlos_bench` command line interface as well. Examine the results using something like: ```sh - # For sqlite: sqlite3 mlos_bench.sqlite .schema sqlite3 mlos_bench.sqlite "SELECT * FROM alembic_version;" ``` - ```sh - # For mysql: - mysql --user root --password=password --host localhost --protocol tcp --database mlos_bench -e "SHOW TABLES; SELECT * FROM alembic_version;" - ``` - - ```sh - # For postgres: - PGPASSWORD=password psql -h localhost -p 5432 -U postgres mlos_bench -c "SELECT table_name FROM information_schema.tables WHERE table_schema='public' and table_catalog='mlos_bench'; SELECT * FROM alembic_version;" - ``` - - > Use different CLI clients for targeting other engines. - 1. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. > Be sure to update the latest version in the [`test_storage_schemas.py`](../../../tests/storage/test_storage_schemas.py) file as well. -1. Cleanup any server instances you started. - - For instance: - - ```sh - rm mlos_bench/storage/sql/mlos_bench.sqlite - ``` - - ```sh - docker kill mysql-alembic - ``` - 1. Merge that to the `main` branch. 1. Might be good to cut a new `mlos_bench` release at this point as well. From bbcf689a3ec1d53f5430502e50cc98182010ae36 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 10:34:00 -0500 Subject: [PATCH 25/49] Revert "restore the original alembic comments - moving to separate PR" This reverts commit 45ad11ffb8f3eed0b3a46987d7897bb732b9c5f8. --- mlos_bench/mlos_bench/storage/sql/alembic.ini | 3 + .../mlos_bench/storage/sql/alembic/README.md | 104 ++++++++++++++++-- 2 files changed, 98 insertions(+), 9 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index f5a249fd7a..6cc1e4b7c8 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -63,7 +63,10 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # output_encoding = utf-8 # See README.md for details. +# Uncomment one of these: sqlalchemy.url = sqlite:///mlos_bench.sqlite +#sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench +#sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench [post_write_hooks] diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md index ec35eb70f6..50164ed005 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/README.md +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -4,17 +4,67 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla ## Overview -1. Create a blank `mlos_bench.sqlite` database file in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: +1. Create a blank database instance in the [`mlos_bench/storage/sql`](../) directory with the current schema using the following command: - ```sh - cd mlos_bench/storage/sql - rm mlos_bench.sqlite - mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only - ``` + This allows `alembic` to automatically generate a migration script from the current schema. + + > NOTE: If your schema changes target a particular backend engine (e.g., using `with_variant`) you will need to use an engine with that config for this step. + > \ + > In the remainder of this document we should some examples for different DB types. + > Pick the one you're targeting and stick with it thru the example. + > You may need to repeat the process several times to test all of them. + > + > - [ ] TODO: Add scripts to automatically do this for several different backend engines all at once. + + For instance: + + 1. Start a temporary server either as a local file or in a docker instance + + ```sh + # sqlite + cd mlos_bench/storage/sql + rm -f mlos_bench.sqlite + ``` + + ```sh + # mysql + docker run -it --rm --name mysql-alembic --env MYSQL_ROOT_PASSWORD=password --env MYSQL_DATABASE=mlos_bench -p 3306:3306 mysql:latest + ``` + + ```sh + # postgres + docker run -it --rm --name postgres-alembic --env POSTGRES_PASSWORD=password --env POSTGRES_DB=mlos_bench -p 5432:5432 postgres:latest + ``` - > This allows `alembic` to automatically generate a migration script from the current schema. + 1. Adjust the `sqlalchemy.url` in the [`alembic.ini`](../alembic.ini) file. -1. Adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. + ```ini + # Uncomment one of these. + # See README.md for details. + + #sqlalchemy.url = sqlite:///mlos_bench.sqlite + sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench + #sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench + ``` + + 1. Prime the DB schema + + ```sh + # sqlite + mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # mysql + mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # postgres + mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password + ``` + +1. Now, adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. > Keep each change small and atomic. > For example, if you want to add a new column, do that in one change. @@ -23,7 +73,7 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla 1. Generate a new migration script with the following command: ```sh - alembic revision --autogenerate -m "Descriptive text about the change." + alembic revision --autogenerate -m "CHANGEME: Descriptive text about the change." ``` 1. Review the generated migration script in the [`mlos_bench/storage/sql/alembic/versions`](./versions/) directory. @@ -31,22 +81,58 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla 1. Verify that the migration script works by running the following command: ```sh + # sqlite mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only ``` + ```sh + # mysql: + mlos_bench --storage storage/mysql.jsonc --create-update-storage-schema-only --password=password + ``` + + ```sh + # postgres: + mlos_bench --storage storage/postgresql.jsonc --create-update-storage-schema-only --password=password + ``` + > Normally this would be done with `alembic upgrade head`, but this command is convenient to ensure if will work with the `mlos_bench` command line interface as well. Examine the results using something like: ```sh + # For sqlite: sqlite3 mlos_bench.sqlite .schema sqlite3 mlos_bench.sqlite "SELECT * FROM alembic_version;" ``` + ```sh + # For mysql: + mysql --user root --password=password --host localhost --protocol tcp --database mlos_bench -e "SHOW TABLES; SELECT * FROM alembic_version;" + ``` + + ```sh + # For postgres: + PGPASSWORD=password psql -h localhost -p 5432 -U postgres mlos_bench -c "SELECT table_name FROM information_schema.tables WHERE table_schema='public' and table_catalog='mlos_bench'; SELECT * FROM alembic_version;" + ``` + + > Use different CLI clients for targeting other engines. + 1. If the migration script works, commit the changes to the [`mlos_bench/storage/sql/schema.py`](../schema.py) and [`mlos_bench/storage/sql/alembic/versions`](./versions/) files. > Be sure to update the latest version in the [`test_storage_schemas.py`](../../../tests/storage/test_storage_schemas.py) file as well. +1. Cleanup any server instances you started. + + For instance: + + ```sh + rm mlos_bench/storage/sql/mlos_bench.sqlite + ``` + + ```sh + docker kill mysql-alembic + ``` + 1. Merge that to the `main` branch. 1. Might be good to cut a new `mlos_bench` release at this point as well. From 53ee6e288c5fb69452d2e6d62162ef89a14a394c Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 10:36:29 -0500 Subject: [PATCH 26/49] more comments --- mlos_bench/mlos_bench/storage/sql/alembic/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/README.md b/mlos_bench/mlos_bench/storage/sql/alembic/README.md index 50164ed005..48b0b04aa7 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/README.md +++ b/mlos_bench/mlos_bench/storage/sql/alembic/README.md @@ -49,6 +49,8 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla 1. Prime the DB schema + > Note: you may want to `git checkout main` first to make sure you're using the current schema here. + ```sh # sqlite mlos_bench --storage storage/sqlite.jsonc --create-update-storage-schema-only --password=password @@ -66,7 +68,10 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla 1. Now, adjust the [`mlos_bench/storage/sql/schema.py`](../schema.py) file to reflect the new desired schema. + > Don't forget to do this on a new branch. + > \ > Keep each change small and atomic. + > \ > For example, if you want to add a new column, do that in one change. > If you want to rename a column, do that in another change. @@ -133,6 +138,10 @@ This document contains some notes on how to use [`alembic`](https://alembic.sqla docker kill mysql-alembic ``` + ```sh + docker kill postgres-alembic + ``` + 1. Merge that to the `main` branch. 1. Might be good to cut a new `mlos_bench` release at this point as well. From 994a32a2476062d04a1a4e6cf3c388f74fcf54f5 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 10:40:59 -0500 Subject: [PATCH 27/49] mypy --- mlos_bench/mlos_bench/tests/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index f9bf6d5b7d..1e0d8c9941 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -127,7 +127,11 @@ def is_docker_service_healthy( capture_output=True, ) docker_ps_json = json.loads(docker_ps_out.stdout.decode().strip()) - return docker_ps_json["State"] == "running" and docker_ps_json["Health"] == "healthy" + state = docker_ps_json["State"] + assert isinstance(state, str) + health = docker_ps_json["Health"] + assert isinstance(health, str) + return state == "running" and health == "healthy" def wait_docker_service_healthy( From 2faf6434a796514c958737649a8dbd9273be667d Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 10:42:44 -0500 Subject: [PATCH 28/49] pylint From a6b89419358eddb352335d7697077a2197737320 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 11:13:51 -0500 Subject: [PATCH 29/49] allow retrieving storage url from the environment --- mlos_bench/mlos_bench/storage/sql/alembic/env.py | 10 ++++++++-- mlos_bench/mlos_bench/storage/sql/storage.py | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index fc186b8cb1..b74da7c6c6 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -5,11 +5,12 @@ """Alembic environment script.""" # pylint: disable=no-member +import os import sys from logging.config import fileConfig from alembic import context -from sqlalchemy import engine_from_config, pool +from sqlalchemy import create_engine, engine_from_config, pool from mlos_bench.storage.sql.schema import DbSchema @@ -25,7 +26,12 @@ # add your model's MetaData object here # for 'autogenerate' support -target_metadata = DbSchema(engine=None).meta +# First, try to read it from an environment variable that we set in mlos_bench. +# Then, fallback to the config file (should only be used by alembic cli while +# devs prepare schema changes). +url = os.getenv("MLOS_ALEMBIC_URL") or config.get_main_option("sqlalchemy.url") +engine = create_engine(url) if url else None +target_metadata = DbSchema(engine=engine).meta # other values from the config, defined by the needs of env.py, # can be acquired: diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index 7fc86aff1f..470cc06fa9 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -5,6 +5,7 @@ """Saving and restoring the benchmark data in SQL database.""" import logging +import os from typing import Literal from sqlalchemy import URL, Engine, create_engine @@ -37,6 +38,8 @@ def __init__( self._lazy_schema_create = self._config.pop("lazy_schema_create", False) self._log_sql = self._config.pop("log_sql", False) self._url = URL.create(**self._config) + os.environ["MLOS_ALEMBIC_URL"] = str(self._url) + _LOG.debug("MLOS_ALEMBIC_URL: %s", os.environ["MLOS_ALEMBIC_URL"]) self._repr = f"{self._url.get_backend_name()}:{self._url.database}" self._engine: Engine self._db_schema: DbSchema From 9c5ac14d4fd3cec0a018401fe5ae775a4c004cd6 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 11:26:21 -0500 Subject: [PATCH 30/49] more alembic tweaks --- mlos_bench/mlos_bench/storage/sql/alembic/env.py | 9 ++++----- mlos_bench/mlos_bench/storage/sql/schema.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index b74da7c6c6..407a986120 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -5,7 +5,6 @@ """Alembic environment script.""" # pylint: disable=no-member -import os import sys from logging.config import fileConfig @@ -26,10 +25,10 @@ # add your model's MetaData object here # for 'autogenerate' support -# First, try to read it from an environment variable that we set in mlos_bench. -# Then, fallback to the config file (should only be used by alembic cli while -# devs prepare schema changes). -url = os.getenv("MLOS_ALEMBIC_URL") or config.get_main_option("sqlalchemy.url") +# NOTE: We override the alembic.ini file programmatically in storage/sql/schema.py +# However, the alembic.ini file value is used during alembic CLI operations +# (e.g., dev ops extending the schema). +url = config.get_main_option("sqlalchemy.url") engine = create_engine(url) if url else None target_metadata = DbSchema(engine=engine).meta diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index facc57b818..902681203c 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -315,11 +315,17 @@ def meta(self) -> MetaData: """Return the SQLAlchemy MetaData object.""" return self._meta - @staticmethod - def _get_alembic_cfg(conn: Connection) -> config.Config: + def _get_alembic_cfg(self, conn: Connection) -> config.Config: alembic_cfg = config.Config( path_join(str(files("mlos_bench.storage.sql")), "alembic.ini", abs_path=True) ) + assert self._engine is not None + alembic_cfg.set_main_option( + "sqlalchemy.url", + self._engine.url.render_as_string( + hide_password=False, + ), + ) alembic_cfg.attributes["connection"] = conn return alembic_cfg From 988bc903b73c8842d164f99323627ad7b8b11fb0 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 11:27:26 -0500 Subject: [PATCH 31/49] remove env --- mlos_bench/mlos_bench/storage/sql/storage.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index 470cc06fa9..7fc86aff1f 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -5,7 +5,6 @@ """Saving and restoring the benchmark data in SQL database.""" import logging -import os from typing import Literal from sqlalchemy import URL, Engine, create_engine @@ -38,8 +37,6 @@ def __init__( self._lazy_schema_create = self._config.pop("lazy_schema_create", False) self._log_sql = self._config.pop("log_sql", False) self._url = URL.create(**self._config) - os.environ["MLOS_ALEMBIC_URL"] = str(self._url) - _LOG.debug("MLOS_ALEMBIC_URL: %s", os.environ["MLOS_ALEMBIC_URL"]) self._repr = f"{self._url.get_backend_name()}:{self._url.database}" self._engine: Engine self._db_schema: DbSchema From f720d0dc18793af0c5577dffbf94bce9cc1752c4 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 11:36:13 -0500 Subject: [PATCH 32/49] temporarily revert back to something like the original schema --- mlos_bench/mlos_bench/storage/sql/schema.py | 31 ++++----------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 902681203c..f280f819c2 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,7 +39,6 @@ create_mock_engine, inspect, ) -from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -113,8 +112,8 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), - Column("ts_end", DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql")), + Column("ts_start", DateTime), + Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._status_len)), @@ -188,16 +187,8 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column( - "ts_start", - DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=False, - ), - Column( - "ts_end", - DateTime().with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=True, - ), + Column("ts_start", DateTime, nullable=False), + Column("ts_end", DateTime, nullable=True), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -249,12 +240,7 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -289,12 +275,7 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("metric_id", String(self._metric_id_len), nullable=False), Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From f250e617d1f63ddc20de22024471f0de7a31e5f9 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 11:37:15 -0500 Subject: [PATCH 33/49] Revert "temporarily revert back to something like the original schema" This reverts commit d77a35127c83366f0d53af664371a6ea9798a3f7. --- mlos_bench/mlos_bench/storage/sql/schema.py | 55 ++++++++++++++++++--- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index f280f819c2..885e9e356f 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,6 +39,7 @@ create_mock_engine, inspect, ) +from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -112,8 +113,20 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime), - Column("ts_end", DateTime), + Column( + "ts_start", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + ), + Column( + "ts_end", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._status_len)), @@ -187,8 +200,22 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column("ts_start", DateTime, nullable=False), - Column("ts_end", DateTime, nullable=True), + Column( + "ts_start", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + ), + Column( + "ts_end", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=True, + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -240,7 +267,15 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + default="now", + ), Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -275,7 +310,15 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + default="now", + ), Column("metric_id", String(self._metric_id_len), nullable=False), Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From 793c2e516b7eb546eebdb60358b7dcbd30cb6d29 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 11:45:44 -0500 Subject: [PATCH 34/49] Reapply "temporarily revert back to something like the original schema" This reverts commit f250e617d1f63ddc20de22024471f0de7a31e5f9. --- mlos_bench/mlos_bench/storage/sql/schema.py | 55 +++------------------ 1 file changed, 6 insertions(+), 49 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 885e9e356f..f280f819c2 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,7 +39,6 @@ create_mock_engine, inspect, ) -from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -113,20 +112,8 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column( - "ts_start", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - ), - Column( - "ts_end", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - ), + Column("ts_start", DateTime), + Column("ts_end", DateTime), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._status_len)), @@ -200,22 +187,8 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column( - "ts_start", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=False, - ), - Column( - "ts_end", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=True, - ), + Column("ts_start", DateTime, nullable=False), + Column("ts_end", DateTime, nullable=True), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -267,15 +240,7 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -310,15 +275,7 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("metric_id", String(self._metric_id_len), nullable=False), Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From 952fba02cb1481565b50e9b5a61a9fb83cf527e8 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 11:58:05 -0500 Subject: [PATCH 35/49] include timezone --- mlos_bench/mlos_bench/storage/sql/schema.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index f280f819c2..aa8132dacb 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -112,8 +112,8 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime), - Column("ts_end", DateTime), + Column("ts_start", DateTime(timezone=True)), + Column("ts_end", DateTime(timezone=True)), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._status_len)), @@ -187,8 +187,8 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column("ts_start", DateTime, nullable=False), - Column("ts_end", DateTime, nullable=True), + Column("ts_start", DateTime(timezone=True), nullable=False), + Column("ts_end", DateTime(timezone=True), nullable=True), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), From d6617bab5cbc2bb8561d9dfe509c9be234dedf8d Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 12:05:58 -0500 Subject: [PATCH 36/49] make mysql datetimes support fractional seconds --- mlos_bench/mlos_bench/storage/sql/schema.py | 55 ++++++++++++++++++--- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index aa8132dacb..885e9e356f 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,6 +39,7 @@ create_mock_engine, inspect, ) +from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -112,8 +113,20 @@ def __init__(self, engine: Engine | None): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime(timezone=True)), - Column("ts_end", DateTime(timezone=True)), + Column( + "ts_start", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + ), + Column( + "ts_end", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._status_len)), @@ -187,8 +200,22 @@ def __init__(self, engine: Engine | None): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column("ts_start", DateTime(timezone=True), nullable=False), - Column("ts_end", DateTime(timezone=True), nullable=True), + Column( + "ts_start", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + ), + Column( + "ts_end", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=True, + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -240,7 +267,15 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + default="now", + ), Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -275,7 +310,15 @@ def __init__(self, engine: Engine | None): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + default="now", + ), Column("metric_id", String(self._metric_id_len), nullable=False), Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From 2f28d79e61af2139860589fb52b83219fd662bf7 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 12:07:38 -0500 Subject: [PATCH 37/49] log the alembic target engine url --- mlos_bench/mlos_bench/storage/sql/alembic/env.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index 407a986120..842550bc0a 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -7,6 +7,7 @@ import sys from logging.config import fileConfig +from logging import info from alembic import context from sqlalchemy import create_engine, engine_from_config, pool @@ -30,6 +31,8 @@ # (e.g., dev ops extending the schema). url = config.get_main_option("sqlalchemy.url") engine = create_engine(url) if url else None +if engine: + info(f"engine.url {str(engine.url)}") target_metadata = DbSchema(engine=engine).meta # other values from the config, defined by the needs of env.py, From 1b0fb2cf75a3ba3766b4ef898faecd690cf165b2 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 12:19:36 -0500 Subject: [PATCH 38/49] engine no longer optional --- mlos_bench/mlos_bench/storage/sql/alembic/env.py | 4 +++- mlos_bench/mlos_bench/storage/sql/schema.py | 10 +++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index 842550bc0a..cb384e5910 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -7,7 +7,7 @@ import sys from logging.config import fileConfig -from logging import info +from logging import info, warning from alembic import context from sqlalchemy import create_engine, engine_from_config, pool @@ -33,6 +33,8 @@ engine = create_engine(url) if url else None if engine: info(f"engine.url {str(engine.url)}") +else: + warning("Missing engine.url: schema changes may not be accurate.") target_metadata = DbSchema(engine=engine).meta # other values from the config, defined by the needs of env.py, diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 885e9e356f..e9b369259b 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -73,19 +73,15 @@ class DbSchema: # for all DB tables, so it's ok to disable the warnings. # pylint: disable=too-many-instance-attributes - def __init__(self, engine: Engine | None): + def __init__(self, engine: Engine): """ Declare the SQLAlchemy schema for the database. Parameters ---------- - engine : sqlalchemy.engine.Engine | None - The SQLAlchemy engine to use for the DB schema. - Listed as optional for `alembic `_ - schema migration purposes so we can reference it inside it's ``env.py`` - config file for :attr:`~meta` data inspection, but won't generally be - functional without one. + engine : sqlalchemy.engine.Engine """ + assert engine, "Error: can't create schema without engine." _LOG.info("Create the DB schema for: %s", engine) self._engine = engine self._meta = MetaData() From 928f4915227521e0e8f98247dd7326ae8a1d2a60 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Thu, 29 May 2025 12:21:34 -0500 Subject: [PATCH 39/49] Revert "make mysql datetimes support fractional seconds" This reverts commit d6617bab5cbc2bb8561d9dfe509c9be234dedf8d. --- mlos_bench/mlos_bench/storage/sql/schema.py | 55 +++------------------ 1 file changed, 6 insertions(+), 49 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index e9b369259b..bb97d61049 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,7 +39,6 @@ create_mock_engine, inspect, ) -from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -109,20 +108,8 @@ def __init__(self, engine: Engine): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column( - "ts_start", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - ), - Column( - "ts_end", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - ), + Column("ts_start", DateTime(timezone=True)), + Column("ts_end", DateTime(timezone=True)), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._status_len)), @@ -196,22 +183,8 @@ def __init__(self, engine: Engine): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column( - "ts_start", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=False, - ), - Column( - "ts_end", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=True, - ), + Column("ts_start", DateTime(timezone=True), nullable=False), + Column("ts_end", DateTime(timezone=True), nullable=True), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -263,15 +236,7 @@ def __init__(self, engine: Engine): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -306,15 +271,7 @@ def __init__(self, engine: Engine): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column( - "ts", - DateTime(timezone=True).with_variant( - mysql.DATETIME(fsp=6), - "mysql", - ), - nullable=False, - default="now", - ), + Column("ts", DateTime(timezone=True), nullable=False, default="now"), Column("metric_id", String(self._metric_id_len), nullable=False), Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From 4863a5b8c390d32862b4182d320be2a8f666ae36 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 13:31:13 -0500 Subject: [PATCH 40/49] Enable alembic to detect datetime precision issues with MySQL --- .../mlos_bench/storage/sql/alembic/env.py | 142 ++++++++++++++++-- 1 file changed, 133 insertions(+), 9 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index cb384e5910..c56e9cebf3 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -7,10 +7,15 @@ import sys from logging.config import fileConfig -from logging import info, warning +import logging from alembic import context +from alembic.migration import MigrationContext from sqlalchemy import create_engine, engine_from_config, pool +from sqlalchemy.schema import Column as SchemaColumn +from sqlalchemy.sql.schema import Column +from sqlalchemy.types import TypeEngine +from sqlalchemy.dialects import mysql from mlos_bench.storage.sql.schema import DbSchema @@ -23,18 +28,18 @@ # Don't override the mlos_bench or pytest loggers though. if config.config_file_name is not None and "alembic" in sys.argv[0]: fileConfig(config.config_file_name) +alembic_logger = logging.getLogger("alembic") # add your model's MetaData object here # for 'autogenerate' support # NOTE: We override the alembic.ini file programmatically in storage/sql/schema.py # However, the alembic.ini file value is used during alembic CLI operations # (e.g., dev ops extending the schema). -url = config.get_main_option("sqlalchemy.url") -engine = create_engine(url) if url else None -if engine: - info(f"engine.url {str(engine.url)}") -else: - warning("Missing engine.url: schema changes may not be accurate.") +sqlalchemy_url = config.get_main_option("sqlalchemy.url") +if not sqlalchemy_url: + raise ValueError("Missing sqlalchemy.url: schema changes may not be accurate.") +engine = create_engine(sqlalchemy_url) +alembic_logger.info("engine.url %s", str(engine.url)) target_metadata = DbSchema(engine=engine).meta # other values from the config, defined by the needs of env.py, @@ -43,6 +48,116 @@ # ... etc. +def fq_class_name(t: object) -> str: + """Return the fully qualified class name of a type.""" + return t.__module__ + "." + t.__class__.__name__ + + +def custom_compare_types( + migration_context: MigrationContext, # pylint: disable=unused-argument + inspected_column: SchemaColumn | None, # pylint: disable=unused-argument + metadata_column: Column, # pylint: disable=unused-argument + inspected_type: TypeEngine, + metadata_type: TypeEngine, +) -> bool | None: + """Custom column type comparator. + + See `Comparing Types + `_ + documentation for more details. + + Parameters + ---------- + + Notes + ----- + In the case of a MySQL DateTime variant, it makes sure that the floating + point accuracy is met. + + Returns + ------- + result : bool | None + Returns True if the column specifications don't match the column (i.e., + a change is needed). + Returns False when the column specification and column match. + Returns None to fallback to the default comparator logic. + """ + metadata_dialect_type = metadata_type.dialect_impl(migration_context.dialect) + if alembic_logger.isEnabledFor(logging.DEBUG): + alembic_logger.debug( + ( + "Comparing columns: " + "inspected_column: [%s] %s and " + "metadata_column: [%s (%s)] %s " + "inspected_column.__dict__: %s\n" + "inspected_column.dialect_options: %s\n" + "inspected_column.dialect_kwargs: %s\n" + "inspected_type.__dict__: %s\n" + "metadata_column.__dict__: %s\n" + "metadata_type.__dict__: %s\n" + "metadata_dialect_type.__dict__: %s\n" + ), + fq_class_name(inspected_type), + inspected_column, + fq_class_name(metadata_type), + fq_class_name(metadata_dialect_type), + metadata_column, + inspected_column.__dict__, + dict(inspected_column.dialect_options) if inspected_column is not None else None, + dict(inspected_column.dialect_kwargs) if inspected_column is not None else None, + inspected_type.__dict__, + metadata_column.__dict__, + metadata_type.__dict__, + metadata_dialect_type.__dict__, + ) + + # Implement a more detailed DATETIME precision comparison for MySQL. + # Note: Currently also handles MariaDB. + if migration_context.dialect.name == "mysql": + if isinstance(metadata_dialect_type, (mysql.DATETIME, mysql.TIMESTAMP)): + if not isinstance(inspected_type, type(metadata_dialect_type)): + alembic_logger.info( + "inspected_type %s does not match metadata_dialect_type %s", + fq_class_name(inspected_type), + fq_class_name(metadata_dialect_type), + ) + return True + else: + if inspected_type.fsp != metadata_dialect_type.fsp: + alembic_logger.info( + "inspected_type.fsp (%s) and metadata_dialect_type.fsp (%s) don't match", + inspected_type.fsp, + metadata_dialect_type.fsp, + ) + return True + + if inspected_type.timezone != metadata_dialect_type.timezone: + alembic_logger.info( + ( + "inspected_type.timezone (%s) and " + "metadata_dialect_type.timezone (%s) don't match" + ), + inspected_type.timezone, + metadata_dialect_type.timezone, + ) + return True + + if alembic_logger.isEnabledFor(logging.DEBUG): + alembic_logger.debug( + ( + "Using default compare_type behavior for " + "inspected_column: [%s] %s and " + "metadata_column: [%s (%s)] %s (see above for details).\n" + ), + fq_class_name(inspected_type), + inspected_column, + fq_class_name(metadata_type), + fq_class_name(metadata_dialect_type), + metadata_column, + ) + return None # fallback to default comparison behavior + + def run_migrations_offline() -> None: """ Run migrations in 'offline' mode. @@ -59,6 +174,7 @@ def run_migrations_offline() -> None: target_metadata=target_metadata, literal_binds=True, dialect_opts={"paramstyle": "named"}, + compare_type=custom_compare_types, ) with context.begin_transaction(): @@ -84,12 +200,20 @@ def run_migrations_online() -> None: ) with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=target_metadata) + context.configure( + connection=connection, + target_metadata=target_metadata, + compare_type=custom_compare_types, + ) with context.begin_transaction(): context.run_migrations() else: - context.configure(connection=connectable, target_metadata=target_metadata) + context.configure( + connection=connectable, + target_metadata=target_metadata, + compare_type=custom_compare_types, + ) with context.begin_transaction(): context.run_migrations() From 6a697a06df901e3508d080ed7fd138c4f3dd8c70 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 13:31:34 -0500 Subject: [PATCH 41/49] Enable floating point seconds with mysql --- mlos_bench/mlos_bench/storage/sql/schema.py | 55 ++++++++++++++++++--- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index bb97d61049..e9b369259b 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -39,6 +39,7 @@ create_mock_engine, inspect, ) +from sqlalchemy.dialects import mysql from sqlalchemy.engine import Engine from mlos_bench.util import path_join @@ -108,8 +109,20 @@ def __init__(self, engine: Engine): Column("git_repo", String(1024), nullable=False), Column("git_commit", String(40), nullable=False), # For backwards compatibility, we allow NULL for ts_start. - Column("ts_start", DateTime(timezone=True)), - Column("ts_end", DateTime(timezone=True)), + Column( + "ts_start", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + ), + Column( + "ts_end", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: # For backwards compatibility, we allow NULL for status. Column("status", String(self._status_len)), @@ -183,8 +196,22 @@ def __init__(self, engine: Engine): Column("trial_id", Integer, nullable=False), Column("config_id", Integer, nullable=False), Column("trial_runner_id", Integer, nullable=True, default=None), - Column("ts_start", DateTime(timezone=True), nullable=False), - Column("ts_end", DateTime(timezone=True), nullable=True), + Column( + "ts_start", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + ), + Column( + "ts_end", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=True, + ), # Should match the text IDs of `mlos_bench.environments.Status` enum: Column("status", String(self._status_len), nullable=False), PrimaryKeyConstraint("exp_id", "trial_id"), @@ -236,7 +263,15 @@ def __init__(self, engine: Engine): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + default="now", + ), Column("status", String(self._status_len), nullable=False), UniqueConstraint("exp_id", "trial_id", "ts"), ForeignKeyConstraint( @@ -271,7 +306,15 @@ def __init__(self, engine: Engine): self._meta, Column("exp_id", String(self._exp_id_len), nullable=False), Column("trial_id", Integer, nullable=False), - Column("ts", DateTime(timezone=True), nullable=False, default="now"), + Column( + "ts", + DateTime(timezone=True).with_variant( + mysql.DATETIME(fsp=6), + "mysql", + ), + nullable=False, + default="now", + ), Column("metric_id", String(self._metric_id_len), nullable=False), Column("metric_value", String(self._metric_value_len)), UniqueConstraint("exp_id", "trial_id", "ts", "metric_id"), From f7ccf2621835bd9a62e7a04eef3319cb0e526955 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 13:33:36 -0500 Subject: [PATCH 42/49] Alembic script to add floating point seconds precision --- ...4_support_fractional_seconds_with_mysql.py | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py new file mode 100644 index 0000000000..c7a12d1b19 --- /dev/null +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py @@ -0,0 +1,120 @@ +# +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# +"""Support fractional seconds with MySQL + +Revision ID: b61aa446e724 +Revises: 8928a401115b +Create Date: 2025-06-02 17:56:34.746642+00:00 + +""" +# pylint: disable=no-member + +from collections.abc import Sequence + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import mysql + +# revision identifiers, used by Alembic. +revision: str = "b61aa446e724" +down_revision: str | None = "8928a401115b" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """The schema upgrade script for this revision.""" + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "experiment", + "ts_start", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=True, + ) + op.alter_column( + "experiment", + "ts_end", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=True, + ) + op.alter_column( + "trial", + "ts_start", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=False, + ) + op.alter_column( + "trial", + "ts_end", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=True, + ) + op.alter_column( + "trial_status", + "ts", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=False, + ) + op.alter_column( + "trial_telemetry", + "ts", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=False, + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """The schema downgrade script for this revision.""" + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "trial_telemetry", + "ts", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=False, + ) + op.alter_column( + "trial_status", + "ts", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=False, + ) + op.alter_column( + "trial", + "ts_end", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=True, + ) + op.alter_column( + "trial", + "ts_start", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=False, + ) + op.alter_column( + "experiment", + "ts_end", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=True, + ) + op.alter_column( + "experiment", + "ts_start", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=True, + ) + # ### end Alembic commands ### From 40374c2f25b91f8d9b7633c18e84f72741a7bc6e Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 13:34:20 -0500 Subject: [PATCH 43/49] fixup --- mlos_bench/mlos_bench/storage/sql/alembic.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic.ini b/mlos_bench/mlos_bench/storage/sql/alembic.ini index 6cc1e4b7c8..857375445e 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic.ini +++ b/mlos_bench/mlos_bench/storage/sql/alembic.ini @@ -65,7 +65,7 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # See README.md for details. # Uncomment one of these: sqlalchemy.url = sqlite:///mlos_bench.sqlite -#sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mlos_bench +#sqlalchemy.url = mysql+mysqlconnector://root:password@localhost:3306/mlos_bench #sqlalchemy.url = postgresql+psycopg2://root:password@localhost:5432/mlos_bench From fe4171a1b47f2da26d144d28f58fc0731537c673 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 13:39:59 -0500 Subject: [PATCH 44/49] rework to only apply to mysql --- ...4_support_fractional_seconds_with_mysql.py | 182 +++++++++--------- 1 file changed, 93 insertions(+), 89 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py index c7a12d1b19..1b933d916f 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py @@ -13,7 +13,7 @@ from collections.abc import Sequence -from alembic import op +from alembic import context, op import sqlalchemy as sa from sqlalchemy.dialects import mysql @@ -26,95 +26,99 @@ def upgrade() -> None: """The schema upgrade script for this revision.""" - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column( - "experiment", - "ts_start", - existing_type=mysql.DATETIME(), - type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - existing_nullable=True, - ) - op.alter_column( - "experiment", - "ts_end", - existing_type=mysql.DATETIME(), - type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - existing_nullable=True, - ) - op.alter_column( - "trial", - "ts_start", - existing_type=mysql.DATETIME(), - type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - existing_nullable=False, - ) - op.alter_column( - "trial", - "ts_end", - existing_type=mysql.DATETIME(), - type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - existing_nullable=True, - ) - op.alter_column( - "trial_status", - "ts", - existing_type=mysql.DATETIME(), - type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - existing_nullable=False, - ) - op.alter_column( - "trial_telemetry", - "ts", - existing_type=mysql.DATETIME(), - type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - existing_nullable=False, - ) - # ### end Alembic commands ### + bind = context.get_bind() + if bind.dialect.name == "mysql": + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "experiment", + "ts_start", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=True, + ) + op.alter_column( + "experiment", + "ts_end", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=True, + ) + op.alter_column( + "trial", + "ts_start", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=False, + ) + op.alter_column( + "trial", + "ts_end", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=True, + ) + op.alter_column( + "trial_status", + "ts", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=False, + ) + op.alter_column( + "trial_telemetry", + "ts", + existing_type=mysql.DATETIME(), + type_=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + existing_nullable=False, + ) + # ### end Alembic commands ### def downgrade() -> None: """The schema downgrade script for this revision.""" - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column( - "trial_telemetry", - "ts", - existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - type_=mysql.DATETIME(), - existing_nullable=False, - ) - op.alter_column( - "trial_status", - "ts", - existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - type_=mysql.DATETIME(), - existing_nullable=False, - ) - op.alter_column( - "trial", - "ts_end", - existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - type_=mysql.DATETIME(), - existing_nullable=True, - ) - op.alter_column( - "trial", - "ts_start", - existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - type_=mysql.DATETIME(), - existing_nullable=False, - ) - op.alter_column( - "experiment", - "ts_end", - existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - type_=mysql.DATETIME(), - existing_nullable=True, - ) - op.alter_column( - "experiment", - "ts_start", - existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), - type_=mysql.DATETIME(), - existing_nullable=True, - ) - # ### end Alembic commands ### + bind = context.get_bind() + if bind.dialect.name == "mysql": + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "trial_telemetry", + "ts", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=False, + ) + op.alter_column( + "trial_status", + "ts", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=False, + ) + op.alter_column( + "trial", + "ts_end", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=True, + ) + op.alter_column( + "trial", + "ts_start", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=False, + ) + op.alter_column( + "experiment", + "ts_end", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=True, + ) + op.alter_column( + "experiment", + "ts_start", + existing_type=sa.DateTime(timezone=True).with_variant(mysql.DATETIME(fsp=6), "mysql"), + type_=mysql.DATETIME(), + existing_nullable=True, + ) + # ### end Alembic commands ### From 5a42a141e9ca1d7455f6387f32bb60e4a966a93b Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 15:55:34 -0500 Subject: [PATCH 45/49] be sure to mark that version as required --- .../mlos_bench/tests/storage/sql/fixtures.py | 13 +++++++++++-- .../tests/storage/sql/test_storage_schemas.py | 17 +++++------------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 572b1a4c0d..1c3ab4614e 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -23,7 +23,7 @@ from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.storage.storage_factory import from_config -from mlos_bench.tests import SEED, wait_docker_service_healthy +from mlos_bench.tests import SEED, wait_docker_service_healthy, DOCKER from mlos_bench.tests.storage import ( CONFIG_TRIAL_REPEAT_COUNT, MAX_TRIALS, @@ -39,7 +39,16 @@ # pylint: disable=redefined-outer-name -# TODO: Add mysql_storage and postgres_storage +DOCKER_DBMS_FIXTURES = [] +if DOCKER: + DOCKER_DBMS_FIXTURES = [ + lazy_fixture("mysql_storage"), + lazy_fixture("postgres_storage"), + ] + +PERSISTENT_SQL_STORAGE_FIXTURES = [lazy_fixture("sqlite_storage")] +if DOCKER: + PERSISTENT_SQL_STORAGE_FIXTURES.extend(DOCKER_DBMS_FIXTURES) @pytest.fixture(scope="session") diff --git a/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py index 0206bb0dd7..bf7c4dabee 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/test_storage_schemas.py @@ -10,28 +10,21 @@ from sqlalchemy import inspect from mlos_bench.storage.sql.storage import SqlStorage - -from mlos_bench.tests import DOCKER +from mlos_bench.tests.storage.sql.fixtures import DOCKER_DBMS_FIXTURES # NOTE: This value is hardcoded to the latest revision in the alembic versions directory. # It could also be obtained programmatically using the "alembic heads" command or heads() API. # See Also: schema.py for an example of programmatic alembic config access. -CURRENT_ALEMBIC_HEAD = "8928a401115b" - -# Try to test multiple DBMS engines. -docker_dbms_fixtures = [] -if DOCKER: - docker_dbms_fixtures = [ - lazy_fixture("mysql_storage"), - lazy_fixture("postgres_storage"), - ] +CURRENT_ALEMBIC_HEAD = "b61aa446e724" +# Try to test multiple DBMS engines. @pytest.mark.parametrize( "some_sql_storage_fixture", [ + lazy_fixture("mem_storage"), lazy_fixture("sqlite_storage"), - *docker_dbms_fixtures, + *DOCKER_DBMS_FIXTURES, ], ) def test_storage_schemas(some_sql_storage_fixture: SqlStorage) -> None: From f273ea3cec7c5d68a4d2d145dd24de8f88dc11c2 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 15:56:41 -0500 Subject: [PATCH 46/49] add that refactor too --- .../tests/storage/test_storage_pickling.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py index 365c68af93..f4cc1793b9 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py @@ -9,21 +9,13 @@ from typing import Literal import pytest -from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture from pytz import UTC from mlos_bench.environments.status import Status from mlos_bench.storage.base_storage import Storage -from mlos_bench.tests import DOCKER +from mlos_bench.tests.storage.sql.fixtures import PERSISTENT_SQL_STORAGE_FIXTURES from mlos_bench.tunables.tunable_groups import TunableGroups -docker_dbms_fixtures = [] -if DOCKER: - docker_dbms_fixtures = [ - lazy_fixture("mysql_storage"), - lazy_fixture("postgres_storage"), - ] - # TODO: When we introduce ParallelTrialScheduler warn at config startup time # that it is incompatible with sqlite storage on Windows. @@ -35,8 +27,7 @@ "persistent_storage", [ # TODO: Improve this test to support non-sql backends eventually as well. - lazy_fixture("sqlite_storage"), - *docker_dbms_fixtures, + *PERSISTENT_SQL_STORAGE_FIXTURES, ], ) def test_storage_pickle_restore_experiment_and_trial( From b1b3733eec90f9ec4ceeef2c52b0333dda940f9a Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 16:01:16 -0500 Subject: [PATCH 47/49] Fixups and refactors to allow two things 1. Allow restoring an Experiment by ID for parallel workers without needing to re-evaluate the git info. 2. Allow recovering the absolute root env rather than mistakenly re-resolving it from the root of the repo multiple times (which results in an error). --- mlos_bench/mlos_bench/storage/base_storage.py | 74 +++++++++++++++++-- .../mlos_bench/storage/sql/experiment.py | 10 ++- mlos_bench/mlos_bench/storage/sql/storage.py | 6 +- mlos_bench/mlos_bench/tests/__init__.py | 1 - .../mlos_bench/tests/storage/exp_data_test.py | 2 +- .../mlos_bench/tests/storage/sql/fixtures.py | 4 +- .../tests/storage/test_storage_pickling.py | 2 +- mlos_bench/mlos_bench/tests/util_git_test.py | 20 ++++- mlos_bench/mlos_bench/util.py | 39 ++++++++-- 9 files changed, 134 insertions(+), 24 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 75a84bf0b2..4a7bc65063 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -25,10 +25,12 @@ from __future__ import annotations import logging +import os from abc import ABCMeta, abstractmethod from collections.abc import Iterator, Mapping from contextlib import AbstractContextManager as ContextManager from datetime import datetime +from subprocess import CalledProcessError from types import TracebackType from typing import Any, Literal @@ -38,7 +40,7 @@ from mlos_bench.services.base_service import Service from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.tunables.tunable_groups import TunableGroups -from mlos_bench.util import get_git_info +from mlos_bench.util import get_git_info, get_git_root, path_join _LOG = logging.getLogger(__name__) @@ -187,16 +189,61 @@ def __init__( # pylint: disable=too-many-arguments tunables: TunableGroups, experiment_id: str, trial_id: int, - root_env_config: str, + root_env_config: str | None, description: str, opt_targets: dict[str, Literal["min", "max"]], + git_repo: str | None = None, + git_commit: str | None = None, + rel_root_env_config: str | None = None, ): self._tunables = tunables.copy() self._trial_id = trial_id self._experiment_id = experiment_id - (self._git_repo, self._git_commit, self._root_env_config) = get_git_info( - root_env_config - ) + if root_env_config is None: + # Restoring from DB. + if not (git_repo and git_commit and rel_root_env_config): + raise ValueError( + "Missing required args: git_repo, git_commit, rel_root_env_config" + ) + self._git_repo = git_repo + self._git_commit = git_commit + self._rel_root_env_config = rel_root_env_config + + # Currently we only store the relative path of the root env config + # from the git repo it came from. + git_root = git_repo + if not os.path.exists(git_root): + try: + git_root = get_git_root(os.curdir) + except CalledProcessError: + _LOG.warning( + "Failed to find a git repo in the current working directory: %s", + os.curdir, + ) + git_root = get_git_root(__file__) + + self._abs_root_env_config = path_join( + git_root, + self._rel_root_env_config, + abs_path=True, + ) + _LOG.info( + "Resolved relative root_config %s for experiment %s to %s", + self._rel_root_env_config, + self._experiment_id, + self._abs_root_env_config, + ) + else: + if git_repo or git_commit or rel_root_env_config: + raise ValueError("Unexpected args: git_repo, git_commit, rel_root_env_config") + ( + self._git_repo, + self._git_commit, + self._rel_root_env_config, + self._abs_root_env_config, + ) = get_git_info( + root_env_config, + ) self._description = description self._opt_targets = opt_targets self._in_context = False @@ -278,9 +325,20 @@ def description(self) -> str: return self._description @property - def root_env_config(self) -> str: - """Get the Experiment's root Environment config file path.""" - return self._root_env_config + def rel_root_env_config(self) -> str: + """Get the Experiment's root Environment config's relative file path + to the git repo root. + """ + return self._rel_root_env_config + + @property + def abs_root_env_config(self) -> str: + """Get the Experiment's root Environment config file path. + + This returns the current absolute path to the root config for this + process instead of the path relative to the git repo root. + """ + return self._abs_root_env_config @property def tunables(self) -> TunableGroups: diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index 7a6dbf75b4..6b7aa220a6 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -38,9 +38,12 @@ def __init__( # pylint: disable=too-many-arguments tunables: TunableGroups, experiment_id: str, trial_id: int, - root_env_config: str, + root_env_config: str | None, description: str, opt_targets: dict[str, Literal["min", "max"]], + git_repo: str | None = None, + git_commit: str | None = None, + rel_root_env_config: str | None = None, ): super().__init__( tunables=tunables, @@ -49,6 +52,9 @@ def __init__( # pylint: disable=too-many-arguments root_env_config=root_env_config, description=description, opt_targets=opt_targets, + git_repo=git_repo, + git_commit=git_commit, + rel_root_env_config=rel_root_env_config, ) self._engine = engine self._schema = schema @@ -89,7 +95,7 @@ def _setup(self) -> None: description=self._description, git_repo=self._git_repo, git_commit=self._git_commit, - root_env_config=self._root_env_config, + root_env_config=self._rel_root_env_config, ) ) conn.execute( diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index 7fc86aff1f..27748e5ca8 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -139,9 +139,13 @@ def get_experiment_by_id( experiment_id=exp.exp_id, trial_id=-1, # will be loaded upon __enter__ which calls _setup() description=exp.description, - root_env_config=exp.root_env_config, + # Use special logic to load the experiment root config info directly. + root_env_config=None, tunables=tunables, opt_targets=opt_targets, + git_repo=exp.git_repo, + git_commit=exp.git_commit, + rel_root_env_config=exp.root_env_config, ) def experiment( # pylint: disable=too-many-arguments diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index 1e0d8c9941..41fbd0bc37 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -20,7 +20,6 @@ import pytest import pytz -from pytest_docker.plugin import Services as DockerServices from mlos_bench.util import get_class_from_name, nullable diff --git a/mlos_bench/mlos_bench/tests/storage/exp_data_test.py b/mlos_bench/mlos_bench/tests/storage/exp_data_test.py index dc8baf489c..b8ca6a64b9 100644 --- a/mlos_bench/mlos_bench/tests/storage/exp_data_test.py +++ b/mlos_bench/mlos_bench/tests/storage/exp_data_test.py @@ -30,7 +30,7 @@ def test_exp_data_root_env_config( """Tests the root_env_config property of ExperimentData.""" # pylint: disable=protected-access assert exp_data.root_env_config == ( - exp_storage._root_env_config, + exp_storage._rel_root_env_config, exp_storage._git_repo, exp_storage._git_commit, ) diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 1c3ab4614e..bcb74ad375 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -238,7 +238,7 @@ def exp_storage( with storage.experiment( experiment_id="Test-001", trial_id=1, - root_env_config="environment.jsonc", + root_env_config="my-environment.jsonc", description="pytest experiment", tunables=tunable_groups, opt_targets={"score": "min"}, @@ -372,7 +372,7 @@ def _dummy_run_exp( trial_runners=trial_runners, optimizer=opt, storage=storage, - root_env_config=exp.root_env_config, + root_env_config=exp.abs_root_env_config, ) # Add some trial data to that experiment by "running" it. diff --git a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py index f4cc1793b9..21c204283c 100644 --- a/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py +++ b/mlos_bench/mlos_bench/tests/storage/test_storage_pickling.py @@ -72,7 +72,7 @@ def test_storage_pickle_restore_experiment_and_trial( assert restored_experiment is not experiment assert restored_experiment.experiment_id == experiment.experiment_id assert restored_experiment.description == experiment.description - assert restored_experiment.root_env_config == experiment.root_env_config + assert restored_experiment.rel_root_env_config == experiment.rel_root_env_config assert restored_experiment.tunables == experiment.tunables assert restored_experiment.opt_targets == experiment.opt_targets with restored_experiment: diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index 77fd2779c7..bbac89399d 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -4,13 +4,27 @@ # """Unit tests for get_git_info utility function.""" import re +import os -from mlos_bench.util import get_git_info +from mlos_bench.util import get_git_info, path_join def test_get_git_info() -> None: - """Check that we can retrieve git info about the current repository correctly.""" - (git_repo, git_commit, rel_path) = get_git_info(__file__) + """Check that we can retrieve git info about the current repository + correctly from a file.""" + (git_repo, git_commit, rel_path, abs_path) = get_git_info(__file__) assert "mlos" in git_repo.lower() assert re.match(r"[0-9a-f]{40}", git_commit) is not None assert rel_path == "mlos_bench/mlos_bench/tests/util_git_test.py" + assert abs_path == path_join(__file__, abs_path=True) + + +def test_get_git_info_dir() -> None: + """Check that we can retrieve git info about the current repository + correctly from a directory.""" + dirname = os.path.dirname(__file__) + (git_repo, git_commit, rel_path, abs_path) = get_git_info(dirname) + assert "mlos" in git_repo.lower() + assert re.match(r"[0-9a-f]{40}", git_commit) is not None + assert rel_path == "mlos_bench/mlos_bench/tests" + assert abs_path == path_join(dirname, abs_path=True) diff --git a/mlos_bench/mlos_bench/util.py b/mlos_bench/mlos_bench/util.py index 1c45cd4ecf..0836f5f048 100644 --- a/mlos_bench/mlos_bench/util.py +++ b/mlos_bench/mlos_bench/util.py @@ -274,7 +274,32 @@ def check_required_params(config: Mapping[str, Any], required_params: Iterable[s ) -def get_git_info(path: str = __file__) -> tuple[str, str, str]: +def get_git_root(path: str = __file__) -> str: + """ + Get the root dir of the git repository. + + Parameters + ---------- + path : str, optional + Path to the file in git repository. + + Returns + ------- + str + _description_ + """ + abspath = path_join(path, abs_path=True) + if not os.path.exists(abspath) or not os.path.isdir(abspath): + dirname = os.path.dirname(abspath) + else: + dirname = abspath + git_root = subprocess.check_output( + ["git", "-C", dirname, "rev-parse", "--show-toplevel"], text=True + ).strip() + return git_root + + +def get_git_info(path: str = __file__) -> tuple[str, str, str, str]: """ Get the git repository, commit hash, and local path of the given file. @@ -286,9 +311,13 @@ def get_git_info(path: str = __file__) -> tuple[str, str, str]: Returns ------- (git_repo, git_commit, git_path) : tuple[str, str, str] - Git repository URL, last commit hash, and relative file path. + Git repository URL, last commit hash, and relative file path and current absolute path. """ - dirname = os.path.dirname(path) + abspath = path_join(path, abs_path=True) + if not os.path.exists(abspath) or not os.path.isdir(abspath): + dirname = os.path.dirname(abspath) + else: + dirname = abspath git_repo = subprocess.check_output( ["git", "-C", dirname, "remote", "get-url", "origin"], text=True ).strip() @@ -299,8 +328,8 @@ def get_git_info(path: str = __file__) -> tuple[str, str, str]: ["git", "-C", dirname, "rev-parse", "--show-toplevel"], text=True ).strip() _LOG.debug("Current git branch: %s %s", git_repo, git_commit) - rel_path = os.path.relpath(os.path.abspath(path), os.path.abspath(git_root)) - return (git_repo, git_commit, rel_path.replace("\\", "/")) + rel_path = os.path.relpath(abspath, os.path.abspath(git_root)) + return (git_repo, git_commit, rel_path.replace("\\", "/"), abspath) # Note: to avoid circular imports, we don't specify TunableValue here. From c7146e05269ee858b95b3236c8d59b9a029cd2b9 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Mon, 2 Jun 2025 16:03:54 -0500 Subject: [PATCH 48/49] Extend storage tests to check multiple backends by default --- mlos_bench/mlos_bench/tests/storage/conftest.py | 1 + .../mlos_bench/tests/storage/sql/fixtures.py | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/mlos_bench/mlos_bench/tests/storage/conftest.py b/mlos_bench/mlos_bench/tests/storage/conftest.py index acdbe5024a..290319c884 100644 --- a/mlos_bench/mlos_bench/tests/storage/conftest.py +++ b/mlos_bench/mlos_bench/tests/storage/conftest.py @@ -16,6 +16,7 @@ postgres_storage_info = sql_storage_fixtures.postgres_storage_info postgres_storage = sql_storage_fixtures.postgres_storage sqlite_storage = sql_storage_fixtures.sqlite_storage +mem_storage = sql_storage_fixtures.mem_storage storage = sql_storage_fixtures.storage exp_storage = sql_storage_fixtures.exp_storage exp_no_tunables_storage = sql_storage_fixtures.exp_no_tunables_storage diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index bcb74ad375..4c1f4962b9 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -13,8 +13,10 @@ from random import seed as rand_seed import pytest +from pytest import FixtureRequest from fasteners import InterProcessLock from pytest_docker.plugin import Services as DockerServices +from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture from mlos_bench.optimizers.mock_optimizer import MockOptimizer from mlos_bench.schedulers.sync_scheduler import SyncScheduler @@ -213,7 +215,7 @@ def sqlite_storage() -> Generator[SqlStorage]: @pytest.fixture -def storage() -> SqlStorage: +def mem_storage() -> SqlStorage: """Test fixture for in-memory SQLite3 storage.""" return SqlStorage( service=None, @@ -225,6 +227,19 @@ def storage() -> SqlStorage: ) +@pytest.fixture( + params=[ + lazy_fixture("mem_storage"), + *DOCKER_DBMS_FIXTURES, + ] +) +def storage(request: FixtureRequest) -> SqlStorage: + """Returns a SqlStorage fixture, either in memory, or a dockerized DBMS.""" + sql_storage = request.param + assert isinstance(sql_storage, SqlStorage) + return sql_storage + + @pytest.fixture def exp_storage( storage: SqlStorage, From faeb0e31f3ca1acb5faaf94aa103c9e80d9c644b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 2 Jun 2025 21:18:12 +0000 Subject: [PATCH 49/49] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mlos_bench/mlos_bench/storage/base_storage.py | 11 ++++++----- mlos_bench/mlos_bench/storage/sql/alembic/env.py | 7 ++++--- ...a446e724_support_fractional_seconds_with_mysql.py | 6 +++--- mlos_bench/mlos_bench/tests/__init__.py | 3 ++- mlos_bench/mlos_bench/tests/storage/sql/fixtures.py | 4 ++-- mlos_bench/mlos_bench/tests/util_git_test.py | 12 +++++++----- 6 files changed, 24 insertions(+), 19 deletions(-) diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 4a7bc65063..aec5e4d244 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -326,17 +326,18 @@ def description(self) -> str: @property def rel_root_env_config(self) -> str: - """Get the Experiment's root Environment config's relative file path - to the git repo root. + """Get the Experiment's root Environment config's relative file path to the + git repo root. """ return self._rel_root_env_config @property def abs_root_env_config(self) -> str: - """Get the Experiment's root Environment config file path. + """ + Get the Experiment's root Environment config file path. - This returns the current absolute path to the root config for this - process instead of the path relative to the git repo root. + This returns the current absolute path to the root config for this process + instead of the path relative to the git repo root. """ return self._abs_root_env_config diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/env.py b/mlos_bench/mlos_bench/storage/sql/alembic/env.py index c56e9cebf3..af1a0db720 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/env.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/env.py @@ -5,17 +5,17 @@ """Alembic environment script.""" # pylint: disable=no-member +import logging import sys from logging.config import fileConfig -import logging from alembic import context from alembic.migration import MigrationContext from sqlalchemy import create_engine, engine_from_config, pool +from sqlalchemy.dialects import mysql from sqlalchemy.schema import Column as SchemaColumn from sqlalchemy.sql.schema import Column from sqlalchemy.types import TypeEngine -from sqlalchemy.dialects import mysql from mlos_bench.storage.sql.schema import DbSchema @@ -60,7 +60,8 @@ def custom_compare_types( inspected_type: TypeEngine, metadata_type: TypeEngine, ) -> bool | None: - """Custom column type comparator. + """ + Custom column type comparator. See `Comparing Types `_ diff --git a/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py b/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py index 1b933d916f..0d0f3f9b3d 100644 --- a/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py +++ b/mlos_bench/mlos_bench/storage/sql/alembic/versions/b61aa446e724_support_fractional_seconds_with_mysql.py @@ -2,19 +2,19 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Support fractional seconds with MySQL +""" +Support fractional seconds with MySQL. Revision ID: b61aa446e724 Revises: 8928a401115b Create Date: 2025-06-02 17:56:34.746642+00:00 - """ # pylint: disable=no-member from collections.abc import Sequence -from alembic import context, op import sqlalchemy as sa +from alembic import context, op from sqlalchemy.dialects import mysql # revision identifiers, used by Alembic. diff --git a/mlos_bench/mlos_bench/tests/__init__.py b/mlos_bench/mlos_bench/tests/__init__.py index 41fbd0bc37..a367133701 100644 --- a/mlos_bench/mlos_bench/tests/__init__.py +++ b/mlos_bench/mlos_bench/tests/__init__.py @@ -16,10 +16,10 @@ from datetime import tzinfo from logging import debug, warning from subprocess import run -from pytest_docker.plugin import Services as DockerServices import pytest import pytz +from pytest_docker.plugin import Services as DockerServices from mlos_bench.util import get_class_from_name, nullable @@ -89,6 +89,7 @@ def check_class_name(obj: object, expected_class_name: str) -> bool: full_class_name = obj.__class__.__module__ + "." + obj.__class__.__name__ return full_class_name == try_resolve_class_name(expected_class_name) + HOST_DOCKER_NAME = "host.docker.internal" diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py index 4c1f4962b9..9f5e203ad6 100644 --- a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py +++ b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py @@ -13,8 +13,8 @@ from random import seed as rand_seed import pytest -from pytest import FixtureRequest from fasteners import InterProcessLock +from pytest import FixtureRequest from pytest_docker.plugin import Services as DockerServices from pytest_lazy_fixtures.lazy_fixture import lf as lazy_fixture @@ -25,7 +25,7 @@ from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.sql.storage import SqlStorage from mlos_bench.storage.storage_factory import from_config -from mlos_bench.tests import SEED, wait_docker_service_healthy, DOCKER +from mlos_bench.tests import DOCKER, SEED, wait_docker_service_healthy from mlos_bench.tests.storage import ( CONFIG_TRIAL_REPEAT_COUNT, MAX_TRIALS, diff --git a/mlos_bench/mlos_bench/tests/util_git_test.py b/mlos_bench/mlos_bench/tests/util_git_test.py index bbac89399d..788c06a5ea 100644 --- a/mlos_bench/mlos_bench/tests/util_git_test.py +++ b/mlos_bench/mlos_bench/tests/util_git_test.py @@ -3,15 +3,16 @@ # Licensed under the MIT License. # """Unit tests for get_git_info utility function.""" -import re import os +import re from mlos_bench.util import get_git_info, path_join def test_get_git_info() -> None: - """Check that we can retrieve git info about the current repository - correctly from a file.""" + """Check that we can retrieve git info about the current repository correctly from a + file. + """ (git_repo, git_commit, rel_path, abs_path) = get_git_info(__file__) assert "mlos" in git_repo.lower() assert re.match(r"[0-9a-f]{40}", git_commit) is not None @@ -20,8 +21,9 @@ def test_get_git_info() -> None: def test_get_git_info_dir() -> None: - """Check that we can retrieve git info about the current repository - correctly from a directory.""" + """Check that we can retrieve git info about the current repository correctly from a + directory. + """ dirname = os.path.dirname(__file__) (git_repo, git_commit, rel_path, abs_path) = get_git_info(dirname) assert "mlos" in git_repo.lower()