Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ repos:

- repo: https://github.com/astral-sh/ruff-pre-commit
# Linter and formatter for Python code.
rev: v0.14.14
rev: v0.15.1
hooks:
# Run the linter.
# Automatically fix issues where possible.
Expand All @@ -51,3 +51,9 @@ repos:
hooks:
- id: bandit
args: ["--exclude", "tests"]

- repo: https://github.com/crate-ci/typos
# Source code spell checker
rev: v1.43.4
hooks:
- id: typos
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ dependencies = [

[dependency-groups]
dev = [
"bandit>=1.9.2",
"bandit>=1.9.3",
"jupyterlab>=4.4.9",
"matplotlib>=3.10.1",
"plotly>=6.0.0",
"prek>=0.2.29",
"prek>=0.3.2",
"pysankeybeta>=1.4.2",
"pytest>=9.0.2",
"ruff>=0.11.0",
"ruff>=0.15.1",
"upsetplot>=0.9.0",
"watermark>=2.5.0",
]
Expand Down
58 changes: 30 additions & 28 deletions ruff.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,39 +8,41 @@ docstring-code-line-length = 72
[lint]
preview = true # Allow preview rules.
extend-select = [
"F", # pyflakes
"E", # Pycodestyle errors
"W", # Pycodestyle warnings
"I", # isort - import ordering
"A", # Flake8-builtins - misuse of Python built-in names
"B", # bugbear
"BLE", # Flake8-blind-except - flags bare excepts
"C4", # Flake8-comprehensions - best practices in comprehensions
"C90", # McCabe - code complexity metric for functions
"N", # PEP8 Naming
"COM", # Flake8-commas - trailing/comma issues
"D", # Pydocstyle - docstring formatting
"DOC", # Pydoclint - docstring linting and consistency
"E", # Pycodestyle errors
"EM", # Flake8-errmsg - error message style
"F", # pyflakes
"FAST", # FastAPI - FastAPI-specific linting rules
"FBT", # Flake8-boolean-trap - potential pitfalls with booleans
"FURB", # Refurb - rules for code refurbishment
"G", # Flake8-logging-format - logging format string issues
"I", # isort - import ordering
"ICN", # Flake8-import-conventions - enforces conventional import aliases
"LOG", # Flake8-logging - proper logging usage
"N", # PEP8 Naming
"NPY", # NumPy-specific rules - ensures NumPy coding standards
"PD", # Pandas-vet - checks pandas-specific code practices
"PERF", # Perflint - performance-related checks
"PL", # Pylint rules
Copy link

Copilot AI Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The configuration enables "PL" (Pylint rules) on line 33 but then ignores "PLR" (Pylint recommendations) on line 45. This is contradictory because "PLR" is a subset of "PL". Either remove "PL" from the extended select list, or remove "PLR" from the ignore list to maintain consistency.

Copilot uses AI. Check for mistakes.
"PT", # Flake8-pytest-style - pytest best practices
"Q", # Flake8-quotes - enforces quote style consistency
"RUF", # Ruff-specific rules - additional Ruff checks
"S", # Flake8-bandit - security issues
"SIM", # Flake8-simplify - code simplification hints
"UP", # Pyupgrade - upgraded syntax to newer Python versions
"S", # Flake8-bandit – security issues
"BLE", # Flake8-blind-except – flags bare excepts
"FBT", # Flake8-boolean-trap – potential pitfalls with booleans
"A", # Flake8-builtins – misuse of Python built-in names
"COM", # Flake8-commas – trailing/comma issues
"C4", # Flake8-comprehensions – best practices in comprehensions
"EM", # Flake8-errmsg – error message style
"ICN", # Flake8-import-conventions – enforces conventional import aliases
"LOG", # Flake8-logging – proper logging usage
"G", # Flake8-logging-format – logging format string issues
"PT", # Flake8-pytest-style – pytest best practices
"Q", # Flake8-quotes – enforces quote style consistency
"SIM", # Flake8-simplify – code simplification hints
"PD", # Pandas-vet – checks pandas-specific code practices
"NPY", # NumPy-specific rules – ensures NumPy coding standards
"FAST", # FastAPI – FastAPI-specific linting rules
"PERF", # Perflint – performance-related checks
"FURB", # Refurb – rules for code refurbishment
"DOC", # Pydoclint – docstring linting and consistency
"RUF", # Ruff-specific rules – additional Ruff checks
"W", # Pycodestyle warnings
]
ignore = [
"COM812", # Redundant with ruff formatter. See: https://docs.astral.sh/ruff/rules/missing-trailing-comma/
"G004", # f-strings are allowed with the loguru module. See https://docs.astral.sh/ruff/rules/logging-f-string/
"G004", # f-strings are allowed with the loguru module. See https://docs.astral.sh/ruff/rules/logging-f-string/
"PLR", # No Pylint recommendations
]

# Force numpy-style for docstrings
Expand All @@ -53,6 +55,6 @@ convention = "numpy"
"tests/**/*.py" = [
# at least this three should be fine in tests:
"S101", # asserts allowed in tests...
"ARG", # Unused function args -> fixtures nevertheless are functionally relevant...
"FBT", # Don't care about booleans as positional arguments in tests, e.g. via @pytest.mark.parametrize()
"ARG", # Unused function args -> fixtures nevertheless are functionally relevant...
"FBT", # Don't care about booleans as positional arguments in tests, e.g. via @pytest.mark.parametrize()
]
12 changes: 10 additions & 2 deletions src/mdverse_scrapers/scrapers/mddb.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"base_url": "https://cineca.mddbr.eu",
},
}
MAX_NUMBER_OF_DATASETS_IN_DEBUG = 100


def scrape_all_datasets(
Expand Down Expand Up @@ -140,8 +141,15 @@ def scrape_all_datasets(
logger.debug("First dataset metadata on this page:")
logger.debug(datasets[0] if datasets else "No datasets on this page")

if scraper and scraper.is_in_debug_mode and len(all_datasets) >= 100:
logger.warning("Debug mode is ON: stopping after 100 datasets.")
if (
scraper
and scraper.is_in_debug_mode
and len(all_datasets) >= MAX_NUMBER_OF_DATASETS_IN_DEBUG
):
logger.warning(
"Debug mode is ON: "
f"stopping after {MAX_NUMBER_OF_DATASETS_IN_DEBUG} datasets."
)
return all_datasets

logger.success(f"Scraped {len(all_datasets):,} datasets in MDposit.")
Expand Down
Loading
Loading