Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions .flake8

This file was deleted.

19 changes: 16 additions & 3 deletions .github/workflows/pytest-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,23 @@ jobs:
python -m pip install ".[dev]"
pre-commit install

- name: pre-commit check
- name: Get changed files
id: changed-files
run: |
source venv/bin/activate
pre-commit run --all
CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | tr '\n' ' ')
echo "CHANGED_FILES=${CHANGED_FILES}" >> $GITHUB_ENV

- name: Print changed files
run: |
echo "Changed files:" && echo "$CHANGED_FILES" | tr ' ' '\n'

- name: Run pre-commit on changed files
run: |
if [ -n "$CHANGED_FILES" ]; then
pre-commit run --color always --files $CHANGED_FILES --show-diff-on-failure
else
echo "No changed files to check."
fi

- name: Run pytest
run: |
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,6 @@ dmypy.json
.pyre/

.vscode

# Ruff cache
.ruff_cache/
27 changes: 5 additions & 22 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0 # Use the ref you want to point at
rev: v6.0.0 # Use the ref you want to point at
hooks:
- id: no-commit-to-branch
- id: check-added-large-files
Expand All @@ -14,33 +14,16 @@ repos:
- id: check-json
- id: check-toml
- id: check-yaml
- id: mixed-line-ending
args: ['--fix=lf']
- repo: https://github.com/codespell-project/codespell
rev: v2.2.4
rev: v2.4.1
hooks:
- id: codespell
files: "(docs|.github/ISSUE_TEMPLATE)/.*"
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.7
rev: v0.15.2
hooks:
- id: ruff
args: [ --fix ]
- id: ruff-format
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.991
hooks:
- id: mypy
name: mypy src
files: src/.*
additional_dependencies:
- types-python-dateutil
- types-pytz
- types-requests
- types-setuptools
- repo: local
hooks:
- id: pytest-check
name: pytest-check
entry: pytest src/tests --versions 'latest'
language: system
pass_filenames: false
exclude: ".*.md"
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy import Column, INT, String, Enum

from database.model.field_length import NORMAL
Expand Down
1 change: 0 additions & 1 deletion alembic/alembic/versions/19f12fe539c7_extend_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy import String

logger = logging.getLogger("alembic")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy import Column, String

# revision identifiers, used by Alembic.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.mysql import DECIMAL

# revision identifiers, used by Alembic.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,26 +43,26 @@ def upgrade() -> None:
logger.info("Fetching existing foreign key constraints.")
constraints = session.execute(
text(
"SELECT refs.CONSTRAINT_NAME, refs.DELETE_RULE, kcu.TABLE_NAME, kcu.COLUMN_NAME, kcu.REFERENCED_TABLE_NAME, kcu.REFERENCED_COLUMN_NAME "
"SELECT refs.CONSTRAINT_NAME, refs.DELETE_RULE, kcu.TABLE_NAME, kcu.COLUMN_NAME, kcu.REFERENCED_TABLE_NAME, kcu.REFERENCED_COLUMN_NAME " # noqa: E501
"FROM information_schema.REFERENTIAL_CONSTRAINTS as refs "
"JOIN information_schema.KEY_COLUMN_USAGE as kcu "
"ON refs.CONSTRAINT_NAME=kcu.CONSTRAINT_NAME "
f"WHERE refs.REFERENCED_TABLE_NAME='knowledge_asset';"
"WHERE refs.REFERENCED_TABLE_NAME='knowledge_asset';"
)
)
constraints = list(constraints)
logger.info(f"Dropping {len(constraints)} foreign key constraints.")
for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints:
for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints: # noqa: B007
op.execute(f"ALTER TABLE {from_table} DROP FOREIGN KEY {constraint}")

# Without the foreign key constraints in place, we can update the columns.
updated_columns = set()
for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints:
for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints: # noqa: B007
for table, column in [(to_table, to_column), (from_table, from_column)]:
if (table, column) not in updated_columns:
logger.info(f"Altering {table}.{column} to VARCHAR(30) COLLATE utf8_bin.")
op.execute(
f"ALTER TABLE {table} CHANGE COLUMN {column} {column} VARCHAR(30) COLLATE utf8_bin;"
f"ALTER TABLE {table} CHANGE COLUMN {column} {column} VARCHAR(30) COLLATE utf8_bin;" # noqa: E501
)
updated_columns.add((table, column))

Expand Down
10 changes: 5 additions & 5 deletions alembic/alembic/versions/459323683348_synchronize_identifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,25 +113,25 @@ def upgrade() -> None:
logger.info("Fetching existing foreign key constraints.")
constraints = session.execute(
text(
"SELECT refs.CONSTRAINT_NAME, refs.DELETE_RULE, kcu.TABLE_NAME, kcu.COLUMN_NAME, kcu.REFERENCED_TABLE_NAME, kcu.REFERENCED_COLUMN_NAME "
"SELECT refs.CONSTRAINT_NAME, refs.DELETE_RULE, kcu.TABLE_NAME, kcu.COLUMN_NAME, kcu.REFERENCED_TABLE_NAME, kcu.REFERENCED_COLUMN_NAME " # noqa: E501
"FROM information_schema.REFERENTIAL_CONSTRAINTS as refs "
"JOIN information_schema.KEY_COLUMN_USAGE as kcu "
"ON refs.CONSTRAINT_NAME=kcu.CONSTRAINT_NAME "
f"WHERE refs.REFERENCED_TABLE_NAME IN ({', '.join(map(repr, tables_with_referenced_key))});"
f"WHERE refs.REFERENCED_TABLE_NAME IN ({', '.join(map(repr, tables_with_referenced_key))});" # noqa: E501
)
)
constraints = list(constraints)
logger.info(f"Dropping {len(constraints)} foreign key constraints.")
for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints:
for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints: # noqa: B007
op.execute(f"ALTER TABLE {from_table} DROP FOREIGN KEY {constraint}")

updated_columns = set()
for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints:
for constraint, delete_rule, from_table, from_column, to_table, to_column in constraints: # noqa: B007
for table, column in [(to_table, to_column), (from_table, from_column)]:
if (table, column) not in updated_columns:
logger.info(f"Altering {table}.{column} to VARCHAR(30) COLLATE utf8_bin.")
op.execute(
f"ALTER TABLE {table} CHANGE COLUMN {column} {column} VARCHAR(30) COLLATE utf8_bin;"
f"ALTER TABLE {table} CHANGE COLUMN {column} {column} VARCHAR(30) COLLATE utf8_bin;" # noqa: E501
)
updated_columns.add((table, column))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy import String

# revision identifiers, used by Alembic.
Expand Down
6 changes: 3 additions & 3 deletions alembic/alembic/versions/8b054cdc9261_create_map_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def upgrade() -> None:
END WHILE;
RETURN result;
END;
"""
""" # noqa: E501
)
)
# We store a map for the old->new identifiers so we can support backwards compatibility (maybe)
Expand All @@ -145,7 +145,7 @@ def upgrade() -> None:
Column("new", String(30), index=True),
)
op.execute(
f"INSERT INTO {map_table} SELECT identifier, CONCAT('{abbreviations[child]}', '_', rand_id()) FROM {child} "
f"INSERT INTO {map_table} SELECT identifier, CONCAT('{abbreviations[child]}', '_', rand_id()) FROM {child} " # noqa: E501
)

for parent in [ai_resource, ai_asset, agent]:
Expand All @@ -157,7 +157,7 @@ def upgrade() -> None:
Column("new", String(30), index=True),
)
child_data = "UNION ".join(
f"SELECT child.{parent.fk_identifier} as parent_identifier, child_map_table.new as new_identifier "
f"SELECT child.{parent.fk_identifier} as parent_identifier, child_map_table.new as new_identifier " # noqa: E501
f"FROM {child_table} as child "
f"JOIN _{child_table}_identifier_map as child_map_table "
f"ON child_map_table.old=child.identifier "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy import String

# revision identifiers, used by Alembic.
Expand All @@ -22,15 +21,15 @@

def upgrade() -> None:
op.alter_column(
f"event",
"event",
"registration_link",
type_=String(1800),
)


def downgrade() -> None:
op.alter_column(
f"event",
"event",
"registration_link",
type_=String(256),
)
1 change: 0 additions & 1 deletion alembic/alembic/versions/d09ed8ad4533_add_news_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy import Column, String

from database.model.field_length import LONG
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
def upgrade() -> None:
# Migrate existing countries to country table as unofficial
op.execute(
"insert into country(name, definition, official) SELECT distinct(country), '', false from address;"
"insert into country(name, definition, official) SELECT distinct(country), '', false from address;" # noqa: E501
)
# Create new column that references the identifier
op.add_column("address", Column("country_identifier", sa.Integer(), nullable=True))
Expand Down
5 changes: 1 addition & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,12 @@ py-modules = []

[tool.ruff]
line-length = 100
exclude = [
"src/tests",
]

[tool.codespell]
ignore-words-list = "checkin"

[tool.ruff.lint]
select = ["S"]
select = ["S", "B", "C", "E", "F", "T", "W"]

[tool.pytest.ini_options]
filterwarnings = [
Expand Down
13 changes: 5 additions & 8 deletions scripts/migrate_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,26 @@
so can be used to avoid indexing the same dataset twice under a different platform identifier.

To be run once (around sometime Nov 2024), likely not needed after that. See also #385, 392.
"""
""" # noqa: E501

import logging
import os
import string
from http import HTTPStatus
import time
from pathlib import Path

from sqlalchemy import select
from database.session import DbSession, EngineSingleton
from database.model.dataset.dataset import Dataset
from database.model.platform.platform import Platform
from database.model.platform.platform_names import PlatformName
from database.model.concept.concept import AIoDConcept

# Magic import which triggers ORM setup
import database.setup

import requests
import json

import re
from http import HTTPStatus


def fetch_huggingface_metadata() -> list[dict]:
Expand Down Expand Up @@ -86,9 +82,10 @@ def main():
datasets = session.scalars(datasets_query).all()

logging.info(f"Found {len(datasets)} huggingface datasets.")
is_old_style_identifier = lambda identifier: any(
char not in string.hexdigits for char in identifier
)

def is_old_style_identifier(identifier):
return any(char not in string.hexdigits for char in identifier)

datasets = [
dataset
for dataset in datasets
Expand Down
Loading