aiondemand · fkiraly · Mar 6, 2026 · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/.flake8 b/.flake8
diff --git a/.github/workflows/pytest-tests.yml b/.github/workflows/pytest-tests.yml
@@ -36,10 +36,23 @@ jobs:
         python -m pip install ".[dev]"
         pre-commit install
 
-    - name: pre-commit check
+    - name: Get changed files
+      id: changed-files
       run: |
-        source venv/bin/activate
-        pre-commit run --all
+        CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | tr '\n' ' ')
+        echo "CHANGED_FILES=${CHANGED_FILES}" >> $GITHUB_ENV
+
+    - name: Print changed files
+      run: |
+        echo "Changed files:" && echo "$CHANGED_FILES" | tr ' ' '\n'
+
+    - name: Run pre-commit on changed files
+      run: |
+        if [ -n "$CHANGED_FILES" ]; then
+          pre-commit run --color always --files $CHANGED_FILES --show-diff-on-failure
+        else
+          echo "No changed files to check."
+        fi
 
     - name: Run pytest
       run: |

diff --git a/.gitignore b/.gitignore
@@ -152,3 +152,6 @@ dmypy.json
 .pyre/
 
 .vscode
+
+# Ruff cache
+.ruff_cache/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0  # Use the ref you want to point at
+    rev: v6.0.0  # Use the ref you want to point at
     hooks:
       - id: no-commit-to-branch
       - id: check-added-large-files
@@ -14,33 +14,16 @@ repos:
       - id: check-json
       - id: check-toml
       - id: check-yaml
+      - id: mixed-line-ending
+        args: ['--fix=lf']
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.4
+    rev: v2.4.1
     hooks:
       - id: codespell
         files: "(docs|.github/ISSUE_TEMPLATE)/.*"
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.9.7
+    rev: v0.15.2
     hooks:
       - id: ruff
         args: [ --fix ]
       - id: ruff-format
-  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.991
-    hooks:
-      - id: mypy
-        name: mypy src
-        files: src/.*
-        additional_dependencies:
-          - types-python-dateutil
-          - types-pytz
-          - types-requests
-          - types-setuptools
-  - repo: local
-    hooks:
-      - id: pytest-check
-        name: pytest-check
-        entry: pytest src/tests --versions 'latest'
-        language: system
-        pass_filenames: false
-        exclude: ".*.md"
diff --git a/alembic/alembic/versions/1662d64ebe23_make_draft_status_enum.py b/alembic/alembic/versions/1662d64ebe23_make_draft_status_enum.py
@@ -9,7 +9,6 @@
 from typing import Sequence, Union
 
 from alembic import op
-import sqlalchemy as sa
 from sqlalchemy import Column, INT, String, Enum
 
 from database.model.field_length import NORMAL

diff --git a/alembic/alembic/versions/19f12fe539c7_extend_url.py b/alembic/alembic/versions/19f12fe539c7_extend_url.py
@@ -10,7 +10,6 @@
 from typing import Sequence, Union
 
 from alembic import op
-import sqlalchemy as sa
 from sqlalchemy import String
 
 logger = logging.getLogger("alembic")

diff --git a/alembic/alembic/versions/1d53330411fa_project_funding_link_and_subtitle.py b/alembic/alembic/versions/1d53330411fa_project_funding_link_and_subtitle.py
@@ -9,7 +9,6 @@
 from typing import Sequence, Union
 
 from alembic import op
-import sqlalchemy as sa
 from sqlalchemy import Column, String
 
 # revision identifiers, used by Alembic.

diff --git a/alembic/alembic/versions/1fd9b6a162c4_rename_project_total_cost_euro.py b/alembic/alembic/versions/1fd9b6a162c4_rename_project_total_cost_euro.py
@@ -9,7 +9,6 @@
 from typing import Sequence, Union
 
 from alembic import op
-import sqlalchemy as sa
 from sqlalchemy.dialects.mysql import DECIMAL
 
 # revision identifiers, used by Alembic.

diff --git a/alembic/alembic/versions/42f747800456_knowledge_asset_identifiers.py b/alembic/alembic/versions/42f747800456_knowledge_asset_identifiers.py
@@ -43,26 +43,26 @@ def upgrade() -> None:
         logger.info("Fetching existing foreign key constraints.")
         constraints = session.execute(
             text(
-                "SELECT refs.CONSTRAINT_NAME, refs.DELETE_RULE, kcu.TABLE_NAME, kcu.COLUMN_NAME, kcu.REFERENCED_TABLE_NAME, kcu.REFERENCED_COLUMN_NAME "
+                "SELECT refs.CONSTRAINT_NAME, refs.DELETE_RULE, kcu.TABLE_NAME, kcu.COLUMN_NAME, kcu.REFERENCED_TABLE_NAME, kcu.REFERENCED_COLUMN_NAME "  # noqa: E501
                 "FROM information_schema.REFERENTIAL_CONSTRAINTS as refs "
                 "JOIN information_schema.KEY_COLUMN_USAGE as kcu "
                 "ON refs.CONSTRAINT_NAME=kcu.CONSTRAINT_NAME "
-                f"WHERE refs.REFERENCED_TABLE_NAME='knowledge_asset';"
+                "WHERE refs.REFERENCED_TABLE_NAME='knowledge_asset';"
             )
         )
     constraints = list(constraints)
     logger.info(f"Dropping {len(constraints)} foreign key constraints.")
-    for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints:
+    for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints:  # noqa: B007
         op.execute(f"ALTER TABLE {from_table} DROP FOREIGN KEY {constraint}")
 
     # Without the foreign key constraints in place, we can update the columns.
     updated_columns = set()
-    for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints:
+    for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints:  # noqa: B007
         for table, column in [(to_table, to_column), (from_table, from_column)]:
             if (table, column) not in updated_columns:
                 logger.info(f"Altering {table}.{column} to VARCHAR(30) COLLATE utf8_bin.")
                 op.execute(
-                    f"ALTER TABLE {table} CHANGE COLUMN {column} {column} VARCHAR(30) COLLATE utf8_bin;"
+                    f"ALTER TABLE {table} CHANGE COLUMN {column} {column} VARCHAR(30) COLLATE utf8_bin;"  # noqa: E501
                 )
                 updated_columns.add((table, column))
 

diff --git a/alembic/alembic/versions/459323683348_synchronize_identifiers.py b/alembic/alembic/versions/459323683348_synchronize_identifiers.py
@@ -113,25 +113,25 @@ def upgrade() -> None:
         logger.info("Fetching existing foreign key constraints.")
         constraints = session.execute(
             text(
-                "SELECT refs.CONSTRAINT_NAME, refs.DELETE_RULE, kcu.TABLE_NAME, kcu.COLUMN_NAME, kcu.REFERENCED_TABLE_NAME, kcu.REFERENCED_COLUMN_NAME "
+                "SELECT refs.CONSTRAINT_NAME, refs.DELETE_RULE, kcu.TABLE_NAME, kcu.COLUMN_NAME, kcu.REFERENCED_TABLE_NAME, kcu.REFERENCED_COLUMN_NAME "  # noqa: E501
                 "FROM information_schema.REFERENTIAL_CONSTRAINTS as refs "
                 "JOIN information_schema.KEY_COLUMN_USAGE as kcu "
                 "ON refs.CONSTRAINT_NAME=kcu.CONSTRAINT_NAME "
-                f"WHERE refs.REFERENCED_TABLE_NAME IN ({', '.join(map(repr, tables_with_referenced_key))});"
+                f"WHERE refs.REFERENCED_TABLE_NAME IN ({', '.join(map(repr, tables_with_referenced_key))});"  # noqa: E501
             )
         )
     constraints = list(constraints)
     logger.info(f"Dropping {len(constraints)} foreign key constraints.")
-    for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints:
+    for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints:  # noqa: B007
         op.execute(f"ALTER TABLE {from_table} DROP FOREIGN KEY {constraint}")
 
     updated_columns = set()
-    for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints:
+    for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints:  # noqa: B007
         for table, column in [(to_table, to_column), (from_table, from_column)]:
             if (table, column) not in updated_columns:
                 logger.info(f"Altering {table}.{column} to VARCHAR(30) COLLATE utf8_bin.")
                 op.execute(
-                    f"ALTER TABLE {table} CHANGE COLUMN {column} {column} VARCHAR(30) COLLATE utf8_bin;"
+                    f"ALTER TABLE {table} CHANGE COLUMN {column} {column} VARCHAR(30) COLLATE utf8_bin;"  # noqa: E501
                 )
                 updated_columns.add((table, column))
 

diff --git a/alembic/alembic/versions/79b2dda7e3be_case_sensitive_alternate_name.py b/alembic/alembic/versions/79b2dda7e3be_case_sensitive_alternate_name.py
@@ -9,7 +9,6 @@
 from typing import Sequence, Union
 
 from alembic import op
-import sqlalchemy as sa
 from sqlalchemy import String
 
 # revision identifiers, used by Alembic.

diff --git a/alembic/alembic/versions/8b054cdc9261_create_map_tables.py b/alembic/alembic/versions/8b054cdc9261_create_map_tables.py
@@ -129,7 +129,7 @@ def upgrade() -> None:
                 END WHILE;
                 RETURN result;
             END;
-            """
+            """  # noqa: E501
         )
     )
     # We store a map for the old->new identifiers so we can support backwards compatibility (maybe)
@@ -145,7 +145,7 @@ def upgrade() -> None:
             Column("new", String(30), index=True),
         )
         op.execute(
-            f"INSERT INTO {map_table} SELECT identifier, CONCAT('{abbreviations[child]}', '_', rand_id()) FROM {child} "
+            f"INSERT INTO {map_table} SELECT identifier, CONCAT('{abbreviations[child]}', '_', rand_id()) FROM {child} "  # noqa: E501
         )
 
     for parent in [ai_resource, ai_asset, agent]:
@@ -157,7 +157,7 @@ def upgrade() -> None:
             Column("new", String(30), index=True),
         )
         child_data = "UNION ".join(
-            f"SELECT child.{parent.fk_identifier} as parent_identifier, child_map_table.new as new_identifier "
+            f"SELECT child.{parent.fk_identifier} as parent_identifier, child_map_table.new as new_identifier "  # noqa: E501
             f"FROM {child_table} as child "
             f"JOIN _{child_table}_identifier_map as child_map_table "
             f"ON child_map_table.old=child.identifier "

diff --git a/alembic/alembic/versions/8f9ac801a283_cascade_delete_ai_resource.py b/alembic/alembic/versions/8f9ac801a283_cascade_delete_ai_resource.py
@@ -9,7 +9,6 @@
 from typing import Sequence, Union
 
 from alembic import op
-import sqlalchemy as sa
 
 
 # revision identifiers, used by Alembic.

diff --git a/alembic/alembic/versions/95fa6a3c7eee_extend_registration_link.py b/alembic/alembic/versions/95fa6a3c7eee_extend_registration_link.py
@@ -10,7 +10,6 @@
 from typing import Sequence, Union
 
 from alembic import op
-import sqlalchemy as sa
 from sqlalchemy import String
 
 # revision identifiers, used by Alembic.
@@ -22,15 +21,15 @@
 
 def upgrade() -> None:
     op.alter_column(
-        f"event",
+        "event",
         "registration_link",
         type_=String(1800),
     )
 
 
 def downgrade() -> None:
     op.alter_column(
-        f"event",
+        "event",
         "registration_link",
         type_=String(256),
     )
diff --git a/alembic/alembic/versions/d09ed8ad4533_add_news_source.py b/alembic/alembic/versions/d09ed8ad4533_add_news_source.py
@@ -9,7 +9,6 @@
 from typing import Sequence, Union
 
 from alembic import op
-import sqlalchemy as sa
 from sqlalchemy import Column, String
 
 from database.model.field_length import LONG

diff --git a/alembic/alembic/versions/eb4e8cf555d9_convert_country_to_taxonomy.py b/alembic/alembic/versions/eb4e8cf555d9_convert_country_to_taxonomy.py
@@ -22,7 +22,7 @@
 def upgrade() -> None:
     # Migrate existing countries to country table as unofficial
     op.execute(
-        "insert into country(name, definition, official) SELECT distinct(country), '', false from address;"
+        "insert into country(name, definition, official) SELECT distinct(country), '', false from address;"  # noqa: E501
     )
     # Create new column that references the identifier
     op.add_column("address", Column("country_identifier", sa.Integer(), nullable=True))

diff --git a/pyproject.toml b/pyproject.toml
@@ -63,15 +63,12 @@ py-modules = []
 
 [tool.ruff]
 line-length = 100
-exclude = [
-    "src/tests",
-]
 
 [tool.codespell]
 ignore-words-list = "checkin"
 
 [tool.ruff.lint]
-select = ["S"]
+select = ["S", "B", "C", "E", "F", "T", "W"]
 
 [tool.pytest.ini_options]
 filterwarnings = [

diff --git a/scripts/migrate_hf.py b/scripts/migrate_hf.py
@@ -6,30 +6,26 @@
 so can be used to avoid indexing the same dataset twice under a different platform identifier.
 
 To be run once (around sometime Nov 2024), likely not needed after that. See also #385, 392.
-"""
+"""  # noqa: E501
 
 import logging
 import os
 import string
 from http import HTTPStatus
-import time
 from pathlib import Path
 
 from sqlalchemy import select
 from database.session import DbSession, EngineSingleton
 from database.model.dataset.dataset import Dataset
-from database.model.platform.platform import Platform
 from database.model.platform.platform_names import PlatformName
 from database.model.concept.concept import AIoDConcept
 
 # Magic import which triggers ORM setup
-import database.setup
 
 import requests
 import json
 
 import re
-from http import HTTPStatus
 
 
 def fetch_huggingface_metadata() -> list[dict]:
@@ -86,9 +82,10 @@ def main():
         datasets = session.scalars(datasets_query).all()
 
     logging.info(f"Found {len(datasets)} huggingface datasets.")
-    is_old_style_identifier = lambda identifier: any(
-        char not in string.hexdigits for char in identifier
-    )
+
+    def is_old_style_identifier(identifier):
+        return any(char not in string.hexdigits for char in identifier)
+
     datasets = [
         dataset
         for dataset in datasets
-Original file line number
+Diff line change
@@ Expand Up / @@ -152,3 +152,6 @@ dmypy.json @@
     .pyre/
     .vscode
+    # Ruff cache
+    .ruff_cache/