Skip to content

Commit bd5b5bd

Browse files
committed
filled with deets, not yet tested
1 parent 2acae8e commit bd5b5bd

File tree

11 files changed

+818
-2
lines changed

11 files changed

+818
-2
lines changed

README.md

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,67 @@
1-
# openbis_mouse_uploader
2-
Small uploader utility with CLI for uploading MOUSE pre-processed and processed data to OpenBIS, linked to projects, samples, people.
1+
# MOUSE OpenBIS Uploader
2+
3+
A small CLI tool to upload MOUSE SAXS measurement batches to OpenBIS based on a **YMD** day code
4+
(e.g. `20251220`). It reads the measurement metadata from the Excel logbook and uploads/updates
5+
OpenBIS objects and datasets (RAW and PROCESSED).
6+
7+
## Install (editable)
8+
9+
```bash
10+
python -m venv .venv
11+
source .venv/bin/activate
12+
pip install -U pip
13+
pip install -e .
14+
```
15+
16+
## Token-based authentication
17+
18+
The CLI uses a token stored in a file (default: `~/.datastore_token`).
19+
20+
- Make sure the token file exists and contains a non-empty token string.
21+
- Override the path via `--datastore-token-path`.
22+
23+
## Usage
24+
25+
Minimal (only required argument is the YMD code):
26+
27+
```bash
28+
mouse-uploader 20251220
29+
```
30+
31+
More verbose logging:
32+
33+
```bash
34+
mouse-uploader 20251220 --log-level DEBUG
35+
```
36+
37+
Continue from a later row in the logbook:
38+
39+
```bash
40+
mouse-uploader 20251220 --start-row 50
41+
```
42+
43+
Dry-run (no writes; logs intended actions):
44+
45+
```bash
46+
mouse-uploader 20251220 --dry-run
47+
```
48+
49+
## Notes on behavior
50+
51+
- **People collections are grouped by proposal year**: `PEOPLE_<proposal-year>`.
52+
- **Duplicate objects should not exist**. If they do, the uploader logs a warning and deletes
53+
duplicates permanently until a single object remains.
54+
- If the project-leader BAM record is not found, the `project_leader_bam` property is **omitted**.
55+
56+
## Development
57+
58+
- Formatting/linting: `ruff`
59+
- Typing: `mypy` (configured to ignore missing third-party stubs)
60+
61+
Example:
62+
63+
```bash
64+
ruff check .
65+
mypy src
66+
pytest
67+
```

pyproject.toml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
[build-system]
2+
requires = ["setuptools>=68", "wheel"]
3+
build-backend = "setuptools.build_meta"
4+
5+
[project]
6+
name = "mouse-openbis-uploader"
7+
version = "0.1.0"
8+
description = "CLI tool to upload MOUSE SAXS measurement batches to OpenBIS using a YMD filter."
9+
readme = "README.md"
10+
requires-python = ">=3.11"
11+
license = {text = "BSD-3-Clause"}
12+
authors = [
13+
{name = "BAM MOUSE team"}
14+
]
15+
dependencies = [
16+
"attrs>=23.2.0",
17+
"pybis>=1.36.0",
18+
"logbook2mouse",
19+
"jupyter-analysis-tools",
20+
]
21+
22+
[project.urls]
23+
Repository = "https://example.invalid/replace-me"
24+
25+
[project.scripts]
26+
openbis-mouse-uploader = "mouse_openbis_uploader.cli:main"
27+
28+
[tool.setuptools]
29+
package-dir = {"" = "src"}
30+
31+
[tool.setuptools.packages.find]
32+
where = ["src"]
33+
34+
[tool.ruff]
35+
line-length = 100
36+
target-version = "py311"
37+
38+
[tool.ruff.lint]
39+
select = ["E", "F", "I", "UP", "B"]
40+
41+
[tool.mypy]
42+
python_version = "3.11"
43+
warn_return_any = true
44+
warn_unused_configs = true
45+
disallow_untyped_defs = false
46+
ignore_missing_imports = true
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""MOUSE OpenBIS Uploader package."""
2+
3+
__all__ = ["__version__"]
4+
__version__ = "0.1.0"

src/mouse_openbis_uploader/cli.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
from __future__ import annotations
2+
3+
import argparse
4+
import logging
5+
from pathlib import Path
6+
from typing import Optional, Sequence
7+
8+
from pybis import Openbis
9+
10+
from logbook2mouse.logbook_reader import Logbook2MouseReader
11+
12+
from .config import UploadConfig
13+
from .logging_utils import setup_logger
14+
from .uploader import OpenBISUploader
15+
from .utils import read_token, validate_ymd
16+
from .failures import FailureRecorder
17+
18+
19+
def build_parser() -> argparse.ArgumentParser:
20+
d = UploadConfig() # defaults from class
21+
p = argparse.ArgumentParser(
22+
prog="mouse-uploader",
23+
description="Upload MOUSE measurement batches to OpenBIS using a YMD filter.",
24+
)
25+
26+
p.add_argument("ymd", type=validate_ymd, help="Measurement day code, e.g. 20251220")
27+
28+
p.add_argument("--ds-username", default=d.ds_username, help=f"Username label for logging (default: {d.ds_username})")
29+
p.add_argument("--logbook-path", type=Path, default=d.logbook_path, help=f"Excel logbook (default: {d.logbook_path})")
30+
p.add_argument("--proposal-base-path", type=Path, default=d.proposal_base_path, help=f"Proposal base path (default: {d.proposal_base_path})")
31+
p.add_argument("--base-data-path", type=Path, default=d.base_data_path, help=f"Base data path (default: {d.base_data_path})")
32+
p.add_argument("--datastore-token-path", type=Path, default=d.datastore_token_path, help=f"Token file path (default: {d.datastore_token_path})")
33+
34+
p.add_argument("--space-name", default=d.space_name, help=f"Space name (default: {d.space_name})")
35+
p.add_argument("--projects-prepend", default=d.projects_prepend, help=f"Project prefix (default: {d.projects_prepend})")
36+
p.add_argument("--start-row", type=int, default=d.start_row, help=f"Start row index (default: {d.start_row})")
37+
38+
p.add_argument("--server-url", default=d.server_url, help=f"OpenBIS URL (default: {d.server_url})")
39+
p.add_argument("--sleep-seconds-between-ops", type=float, default=d.sleep_seconds_between_ops, help=f"Sleep between ops (default: {d.sleep_seconds_between_ops})")
40+
p.add_argument("--sleep-seconds-between-datasets", type=float, default=d.sleep_seconds_between_datasets, help=f"Sleep between datasets (default: {d.sleep_seconds_between_datasets})")
41+
42+
p.add_argument("--instrument-name-pattern", default=d.instrument_name_pattern, help=f"Instrument name pattern (default: {d.instrument_name_pattern})")
43+
p.add_argument("--people-collection-prefix", default=d.people_collection_prefix, help=f"People collection prefix (default: {d.people_collection_prefix})")
44+
45+
p.add_argument("--raw-dataset-type", default=d.raw_dataset_type, help=f"Raw dataset type (default: {d.raw_dataset_type})")
46+
p.add_argument("--processed-dataset-type", default=d.processed_dataset_type, help=f"Processed dataset type (default: {d.processed_dataset_type})")
47+
p.add_argument("--log-file", type=Path, default=None, help="Log file path (default: None, logs to stdout only)")
48+
p.add_argument("--failure-file", type=Path, default=Path("upload_failures.jsonl"), help="Failure records file (default: upload_failures.jsonl)")
49+
50+
p.add_argument(
51+
"--log-level",
52+
default="INFO",
53+
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
54+
help="Logging verbosity",
55+
)
56+
p.add_argument("--dry-run", action="store_true", help="No writes; log intended actions")
57+
58+
return p
59+
60+
61+
def _validate_args(args: argparse.Namespace) -> None:
62+
if args.start_row < 0:
63+
raise SystemExit("--start-row must be >= 0")
64+
if args.sleep_seconds_between_ops < 0:
65+
raise SystemExit("--sleep-seconds-between-ops must be >= 0")
66+
if args.sleep_seconds_between_datasets < 0:
67+
raise SystemExit("--sleep-seconds-between-datasets must be >= 0")
68+
69+
if not args.logbook_path.is_file():
70+
raise SystemExit(f"Logbook file not found: {args.logbook_path}")
71+
if not args.proposal_base_path.exists():
72+
raise SystemExit(f"Proposal base path does not exist: {args.proposal_base_path}")
73+
if not args.base_data_path.exists():
74+
raise SystemExit(f"Base data path does not exist: {args.base_data_path}")
75+
if not args.datastore_token_path.is_file():
76+
raise SystemExit(f"Token file not found: {args.datastore_token_path}")
77+
78+
79+
def main(argv: Optional[Sequence[str]] = None) -> int:
80+
parser = build_parser()
81+
args = parser.parse_args(argv)
82+
_validate_args(args)
83+
84+
log_level = getattr(logging, args.log_level.upper(), logging.INFO)
85+
logger = setup_logger(level=log_level)
86+
87+
cfg = UploadConfig(
88+
ymd_filter=args.ymd,
89+
ds_username=args.ds_username,
90+
logbook_path=args.logbook_path,
91+
proposal_base_path=args.proposal_base_path,
92+
base_data_path=args.base_data_path,
93+
datastore_token_path=args.datastore_token_path,
94+
space_name=args.space_name,
95+
projects_prepend=args.projects_prepend,
96+
start_row=args.start_row,
97+
server_url=args.server_url,
98+
sleep_seconds_between_ops=args.sleep_seconds_between_ops,
99+
sleep_seconds_between_datasets=args.sleep_seconds_between_datasets,
100+
instrument_name_pattern=args.instrument_name_pattern,
101+
people_collection_prefix=args.people_collection_prefix,
102+
raw_dataset_type=args.raw_dataset_type,
103+
processed_dataset_type=args.processed_dataset_type,
104+
)
105+
106+
token = read_token(cfg.datastore_token_path)
107+
ds = Openbis(url=cfg.server_url, verify_certificates=True)
108+
ds.set_token(token)
109+
logger.info("Connected to OpenBIS at %s as %s", cfg.server_url, cfg.ds_username)
110+
111+
reader = Logbook2MouseReader(
112+
cfg.logbook_path,
113+
project_base_path=cfg.proposal_base_path,
114+
load_all=True,
115+
)
116+
117+
logger = setup_logger(level=log_level, log_file=args.log_file)
118+
failure_recorder = FailureRecorder(args.failure_file)
119+
120+
uploader = OpenBISUploader(ds=ds, config=cfg, logger=logger, dry_run=args.dry_run, failure_recorder=failure_recorder)
121+
uploader.process_entries(reader)
122+
logger.info(f"Upload run completed. Failures recorded (if any) to: {args.failure_file}")
123+
return 0
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from __future__ import annotations
2+
3+
from pathlib import Path
4+
5+
from attrs import define
6+
7+
8+
@define(frozen=True, slots=True)
9+
class UploadConfig:
10+
"""Configuration for uploading a measurement batch to OpenBIS."""
11+
12+
# Required (CLI positional)
13+
ymd_filter: str # e.g. "20251220"
14+
15+
# Optional
16+
ds_username: str = "bpauw"
17+
18+
logbook_path: Path = Path(
19+
"/mnt/vsi-db/Measurements/SAXS002/logbooks/Logbook_MOUSE_Dataprocessing.xlsx"
20+
)
21+
proposal_base_path: Path = Path("/mnt/vsi-db/Proposals/SAXS002/")
22+
base_data_path: Path = Path("/mnt/vsi-db/Measurements/SAXS002/data")
23+
datastore_token_path: Path = Path.home() / ".datastore_token"
24+
25+
space_name: str = "6.5_PROJECTS"
26+
projects_prepend: str = "MOUSE_PROJECTS_"
27+
28+
start_row: int = 1
29+
30+
server_url: str = "https://main.datastore.bam.de"
31+
sleep_seconds_between_ops: float = 0.5
32+
sleep_seconds_between_datasets: float = 1.0
33+
34+
instrument_name_pattern: str = "MOUSE*"
35+
people_collection_prefix: str = "PEOPLE_"
36+
37+
raw_dataset_type: str = "RAW_DATA"
38+
processed_dataset_type: str = "PROCESSED_DATA"
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from __future__ import annotations
2+
3+
import json
4+
from dataclasses import dataclass
5+
from pathlib import Path
6+
from typing import Any, Optional
7+
8+
9+
@dataclass(slots=True)
10+
class FailureRecord:
11+
stage: str # e.g. "upsert.PROJECT", "upload.RAW_DATA"
12+
ymd: str
13+
batchnum: str
14+
proposal: str
15+
identifier: Optional[str] # openbis identifier if known
16+
message: str
17+
extra: dict[str, Any]
18+
19+
20+
class FailureRecorder:
21+
"""
22+
Appends one JSON object per failure to a .jsonl file.
23+
Designed for large batch runs (streaming write, no big memory use).
24+
"""
25+
def __init__(self, path: Path) -> None:
26+
self.path = path
27+
self.path.parent.mkdir(parents=True, exist_ok=True)
28+
29+
def record(self, rec: FailureRecord) -> None:
30+
payload = {
31+
"stage": rec.stage,
32+
"ymd": rec.ymd,
33+
"batchnum": rec.batchnum,
34+
"proposal": rec.proposal,
35+
"identifier": rec.identifier,
36+
"message": rec.message,
37+
"extra": rec.extra,
38+
}
39+
with self.path.open("a", encoding="utf-8") as f:
40+
f.write(json.dumps(payload, ensure_ascii=False) + "\n")
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
import sys
5+
from pathlib import Path
6+
7+
8+
def setup_logger(
9+
name: str = "openbis_upload_MOUSE",
10+
level: int = logging.INFO,
11+
*,
12+
log_file: Path | None = None,
13+
) -> logging.Logger:
14+
logger = logging.getLogger(name)
15+
16+
if not logger.handlers:
17+
date_fmt = "%Y%m%dT%H:%M:%S"
18+
fmt = logging.Formatter(
19+
fmt="%(asctime)s - %(name)s - %(levelname)s: %(message)s",
20+
datefmt=date_fmt,
21+
)
22+
23+
sh = logging.StreamHandler(stream=sys.stdout)
24+
sh.setFormatter(fmt)
25+
logger.addHandler(sh)
26+
27+
if log_file is not None:
28+
log_file.parent.mkdir(parents=True, exist_ok=True)
29+
fh = logging.FileHandler(log_file, encoding="utf-8")
30+
fh.setFormatter(fmt)
31+
logger.addHandler(fh)
32+
33+
logger.setLevel(level)
34+
return logger
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
5+
from pybis import Openbis
6+
from logbook2mouse.logbook_reader import Logbook2MouseReader
7+
8+
from .config import UploadConfig
9+
from .logging_utils import setup_logger
10+
from .uploader import OpenBISUploader
11+
from .utils import read_token
12+
13+
14+
def run_upload(config: UploadConfig, *, log_level: int = logging.INFO, dry_run: bool = False) -> None:
15+
"""Programmatic entry point (useful for notebooks)."""
16+
logger = setup_logger(level=log_level)
17+
18+
reader = Logbook2MouseReader(
19+
config.logbook_path,
20+
project_base_path=config.proposal_base_path,
21+
load_all=True,
22+
)
23+
24+
token = read_token(config.datastore_token_path)
25+
ds = Openbis(url=config.server_url, verify_certificates=True)
26+
ds.set_token(token)
27+
logger.info("Connected to OpenBIS at %s as %s", config.server_url, config.ds_username)
28+
29+
uploader = OpenBISUploader(ds=ds, config=config, logger=logger, dry_run=dry_run)
30+
uploader.process_entries(reader)

0 commit comments

Comments
 (0)