Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f9573f9
Refactor Spectrum model for optimized data storage and migration; rem…
tlambert03 Nov 26, 2025
54acafb
stub spectrum Data
tlambert03 Nov 26, 2025
9c608ac
Add peak_wave field and compute its value during spectrum data migration
tlambert03 Nov 26, 2025
4b5252c
v2 spectrum form
tlambert03 Nov 27, 2025
0892d71
Refactor SpectrumFormV2 validation logic and add end-to-end tests for…
tlambert03 Nov 27, 2025
de5f47b
clean code
tlambert03 Nov 27, 2025
3d7866f
duplicate checking
tlambert03 Nov 27, 2025
aac8f1b
use udsv
tlambert03 Nov 27, 2025
f140831
Enhance spectrum processing logic to conditionally normalize data bas…
tlambert03 Nov 27, 2025
0f023b1
Update status indicator emojis and enhance chart processing for dual …
tlambert03 Nov 27, 2025
250581d
better typing
tlambert03 Nov 27, 2025
c3deedb
Add duplicate spectrum validation in form processing and backend
tlambert03 Nov 27, 2025
d85b186
cleanup
tlambert03 Nov 27, 2025
da3c465
add violation error message
tlambert03 Nov 27, 2025
9778766
Refactor spectrum migration and model constraints for clarity and val…
tlambert03 Nov 27, 2025
89ce32f
cleanup
tlambert03 Nov 27, 2025
355f655
Merge branch 'main' into spectrum-migrate
tlambert03 Nov 27, 2025
81ad530
fix type
tlambert03 Nov 27, 2025
afabccc
more tests
tlambert03 Nov 27, 2025
31853ee
fix remove all
tlambert03 Nov 27, 2025
33f8bef
use new form by default
tlambert03 Nov 28, 2025
e454daf
change links
tlambert03 Nov 28, 2025
aa78409
Add DOI validation and enhance spectrum submission form
tlambert03 Nov 28, 2025
ec21c04
refactor and add test
tlambert03 Nov 28, 2025
611331d
fix capitalization bug
tlambert03 Nov 28, 2025
27e5760
preserve precision
tlambert03 Nov 28, 2025
bbfdda0
feat: Enhance Spectrum Model and Form Functionality
tlambert03 Nov 28, 2025
3f9830e
better parsing
tlambert03 Nov 28, 2025
5742801
final touches
tlambert03 Nov 28, 2025
adb7782
add contact to erro
tlambert03 Nov 28, 2025
fcfbc80
feat: Add unique constraint for spectrum owner and subtype; update UR…
tlambert03 Nov 29, 2025
b28db80
fix: Update ignore patterns for WebKit and Firefox request cancellati…
tlambert03 Nov 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions backend/proteins/extrest/entrez.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import time
from collections.abc import MutableMapping, Sequence
from typing import TYPE_CHECKING, Literal, TypedDict, cast
from urllib.parse import quote

import requests
from Bio import Entrez, SeqIO
from django.core.cache import cache
from habanero import Crossref
Expand Down Expand Up @@ -109,6 +111,22 @@ def _merge_info(dict1: MutableMapping, dict2: MutableMapping, exclude=()) -> Mut
return dict1


def is_valid_doi(doi: str) -> bool:
"""Check if DOI exists via Crossref API."""
if not doi or len(doi) > 200:
return False
try:
encoded_doi = quote(doi, safe="")
resp = requests.head(
f"https://api.crossref.org/works/{encoded_doi}",
timeout=3,
allow_redirects=True,
)
return resp.status_code == 200
except requests.RequestException:
return False


def doi_lookup(doi: str) -> DoiInfo:
info = _crossref(doi)
pmid = _doi2pmid(doi)
Expand Down
5 changes: 5 additions & 0 deletions backend/proteins/forms/spectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,11 @@ def clean(self):
return cleaned_data

def save(self, commit=True):
# Set spectrum data from form - data is now a property, not a model field,
# so ModelForm won't automatically set it on the instance
if self.cleaned_data.get("data"):
self.instance.data = self.cleaned_data["data"]

cat = self.cleaned_data.get("category")
if cat == Spectrum.DYE:
# Dyes require special handling: create Dye first, then DyeState
Expand Down
345 changes: 345 additions & 0 deletions backend/proteins/forms/spectrum_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,345 @@
"""Enhanced spectrum submission form with client-side processing and multi-spectrum support."""

from __future__ import annotations

import json
from typing import TYPE_CHECKING, TypedDict

from django import forms
from django.apps import apps
from django.db import transaction
from django.utils.text import slugify

from proteins.extrest.entrez import is_valid_doi
from proteins.models import Dye, DyeState, FluorState, Spectrum, State
from references.models import Reference

if TYPE_CHECKING:
from django.contrib.auth.models import User


class SpectrumJSONData(TypedDict):
"""Type definition for spectrum JSON data from frontend.

Must match the SpectrumJSON typedef in form-controller.js.

All fields are always present in the dict. Fields marked with | None
can have null values when not applicable (e.g., ph/solvent for non-bio
categories, scale_factor when not provided, peak_wave when not found).
"""

# Required string/list fields (never None)
data: list[list[float]]
category: str
owner: str
subtype: str
column_name: str

# Always present but can be None
owner_slug: str | None # Protein slug for autocomplete categories
scale_factor: float | None
ph: float | None
solvent: str | None
peak_wave: int | None


MAX_SPECTRA_PER_SUBMISSION = 20
MAX_DATA_POINTS_PER_SPECTRUM = 2000


def _validate_spectrum_json(raw: str | bytes) -> list[SpectrumJSONData]:
if not raw or raw == "[]":
raise forms.ValidationError("No spectrum data provided.")

try:
spectra = json.loads(raw)
except json.JSONDecodeError as e:
raise forms.ValidationError(f"Invalid JSON: {e}") from e

if not isinstance(spectra, list) or len(spectra) == 0:
raise forms.ValidationError("Expected a non-empty array of spectra.")

if len(spectra) > MAX_SPECTRA_PER_SUBMISSION:
raise forms.ValidationError(
f"Too many spectra ({len(spectra)}). "
f"Maximum {MAX_SPECTRA_PER_SUBMISSION} per submission."
)

valid_subtypes = dict(Spectrum.SUBTYPE_CHOICES)
valid_categories = dict(Spectrum.CATEGORIES)

for i, spec in enumerate(spectra):
if not isinstance(spec, dict):
raise forms.ValidationError(f"Spectrum {i + 1} is not a valid object.")

# Validate data
if "data" not in spec:
raise forms.ValidationError(f"Spectrum {i + 1} is missing 'data' field.")

data = spec["data"]
if not isinstance(data, list) or len(data) < 2:
raise forms.ValidationError(f"Spectrum {i + 1} must have at least 2 data points.")

if len(data) > MAX_DATA_POINTS_PER_SPECTRUM:
raise forms.ValidationError(
f"Spectrum {i + 1} has too many data points ({len(data)}). "
f"Maximum {MAX_DATA_POINTS_PER_SPECTRUM}."
)

for j, point in enumerate(data):
if not isinstance(point, list) or len(point) != 2:
raise forms.ValidationError(
f"Spectrum {i + 1}, point {j + 1}: must be [wavelength, value]."
)
if not all(isinstance(v, (int, float)) for v in point):
raise forms.ValidationError(
f"Spectrum {i + 1}, point {j + 1}: values must be numbers."
)

# Validate category
if "category" not in spec or not spec["category"]:
raise forms.ValidationError(f"Spectrum {i + 1} is missing category.")
if spec["category"] not in valid_categories:
raise forms.ValidationError(
f"Spectrum {i + 1} has invalid category: {spec['category']}"
)

# Validate subtype
if "subtype" not in spec or not spec["subtype"]:
raise forms.ValidationError(f"Spectrum {i + 1} is missing subtype.")
if spec["subtype"] not in valid_subtypes:
raise forms.ValidationError(f"Spectrum {i + 1} has invalid subtype: {spec['subtype']}")

# Validate owner
if "owner" not in spec or not spec.get("owner", "").strip():
raise forms.ValidationError(f"Spectrum {i + 1} is missing owner.")

# Check for duplicate spectra within this submission
# Use (category, owner, subtype) as the unique key
seen = {}
for i, spec in enumerate(spectra):
key = (spec["category"], spec["owner"].strip().lower(), spec["subtype"])
if key in seen:
first_idx = seen[key]
raise forms.ValidationError(
f"Duplicate spectrum detected: Spectra {first_idx + 1} and {i + 1} have the same "
f"owner ({spec['owner']}), category, and subtype ({spec['subtype']})."
)
seen[key] = i

return spectra


class SpectrumFormV2(forms.Form):
"""Enhanced spectrum submission form supporting multi-spectrum file uploads.

This form handles client-side processing of spectrum data. The JavaScript frontend
parses CSV/TSV files, allows column selection, normalizes data, and sends processed
spectra as JSON with per-spectrum metadata (category, owner, subtype, etc.).
"""

# Lookup for non-protein, non-dye categories (filter/camera/light)
OWNER_LOOKUP = {
Spectrum.FILTER: ("owner_filter", "Filter"),
Spectrum.CAMERA: ("owner_camera", "Camera"),
Spectrum.LIGHT: ("owner_light", "Light"),
}

# Hidden field containing JSON array of processed spectra from JavaScript
# Structure: [{ "data": [[wave, value]...], "category": "p", "owner": "EGFP",
# "subtype": "ex", "peak_wave": 488, ... }, ...]
spectra_json = forms.CharField(
widget=forms.HiddenInput(),
required=True,
error_messages={"required": "Please upload a file and configure your spectra."},
)

# File upload field (for initial parsing by JavaScript - not required on POST)
file = forms.FileField(
required=False,
label="Spectrum File",
help_text="Upload CSV or TSV file.",
)

# Shared source fields
source = forms.CharField(
max_length=200,
required=False,
label="Source",
help_text="Citation or source of the data",
)

primary_reference = forms.CharField(
max_length=200,
required=False,
label="Primary Reference (DOI)",
help_text="Enter a valid DOI (e.g., 10.1234/example)",
)

# Confirmation checkbox
confirmation = forms.BooleanField(
required=True,
label="I confirm the validity of this data",
)

def __init__(self, *args, **kwargs):
self.user: User | None = kwargs.pop("user", None)
super().__init__(*args, **kwargs)

def clean_spectra_json(self) -> list[SpectrumJSONData]:
"""Parse and validate the JSON array of processed spectra."""
raw = self.cleaned_data.get("spectra_json", "")
return _validate_spectrum_json(raw)

def clean_primary_reference(self) -> str:
"""Validate that the DOI is resolvable if provided."""
doi = self.cleaned_data.get("primary_reference", "").strip()
if doi and not is_valid_doi(doi):
raise forms.ValidationError(
f"Could not find a reference for DOI: {doi}. Please check that it is correct."
)

return doi

def clean(self):
"""Validate that at least one of source or primary_reference is provided."""
cleaned_data = super().clean()
source = cleaned_data.get("source", "").strip()
reference = cleaned_data.get("primary_reference", "").strip()

# Check if user attempted to provide a reference (even if it failed validation)
# by looking at the raw data, not just cleaned_data
attempted_reference = self.data.get("primary_reference", "").strip()

if not source and not reference and not attempted_reference:
raise forms.ValidationError(
"Please provide at least one of Source or Primary Reference."
)

return cleaned_data

def _get_or_create_owner(self, category: str, owner_name: str, owner_slug: str | None = None):
"""Get or create owner objects based on category.

Args:
category: The spectrum category (protein, dye, filter, etc.)
owner_name: Display name of the owner
owner_slug: For proteins, this is the Protein.slug from Select2 autocomplete

Returns:
Tuple of (owner_fluor, owner_filter, owner_camera, owner_light)
"""
owner_fluor = owner_filter = owner_camera = owner_light = None

if category == Spectrum.PROTEIN:
# For proteins, owner_slug is the Protein.slug from Select2 autocomplete
if not owner_slug:
raise forms.ValidationError(
f"Protein '{owner_name}' must be selected from the autocomplete dropdown."
)
try:
owner_fluor = State.objects.select_related("protein").get(protein__slug=owner_slug)
except State.DoesNotExist:
raise forms.ValidationError(f"Protein not found: {owner_name}") from None
except State.MultipleObjectsReturned:
# Get the default state
owner_fluor = (
State.objects.select_related("protein")
.filter(protein__slug=owner_slug)
.first()
)

elif category == Spectrum.DYE:
dye, created = Dye.objects.get_or_create(
slug=slugify(owner_name),
defaults={"name": owner_name, "created_by": self.user},
)
if not created and self.user:
dye.updated_by = self.user
dye.save()

dye_state, _ = DyeState.objects.get_or_create(
dye=dye,
name=FluorState.DEFAULT_NAME,
defaults={"created_by": self.user},
)
owner_fluor = dye_state

elif category in self.OWNER_LOOKUP:
model_name = self.OWNER_LOOKUP[category][1]
owner_model = apps.get_model("proteins", model_name)
owner_obj, created = owner_model.objects.get_or_create(
name=owner_name,
defaults={"created_by": self.user},
)
if not created and self.user:
owner_obj.updated_by = self.user
owner_obj.save()

if category == Spectrum.FILTER:
owner_filter = owner_obj
elif category == Spectrum.CAMERA:
owner_camera = owner_obj
elif category == Spectrum.LIGHT:
owner_light = owner_obj

return owner_fluor, owner_filter, owner_camera, owner_light

@transaction.atomic
def save(self) -> list[Spectrum]:
"""Create Spectrum objects for each processed spectrum.

Returns:
List of created Spectrum objects.
"""
spectra_data = self.cleaned_data["spectra_json"]
source = self.cleaned_data.get("source", "")

# Convert DOI string to Reference instance if provided
reference_doi = self.cleaned_data.get("primary_reference", "").strip()
reference = None
if reference_doi:
reference, _ = Reference.objects.get_or_create(doi=reference_doi)

created_spectra = []

for spec_data in spectra_data:
category = spec_data["category"]
owner_name = spec_data["owner"]
owner_slug = spec_data.get("owner_slug")

owner_fluor, owner_filter, owner_camera, owner_light = self._get_or_create_owner(
category, owner_name, owner_slug
)

spectrum = Spectrum(
category=category,
subtype=spec_data["subtype"],
owner_fluor=owner_fluor,
owner_filter=owner_filter,
owner_camera=owner_camera,
owner_light=owner_light,
ph=spec_data.get("ph"),
solvent=spec_data.get("solvent") or "",
source=source,
reference=reference,
created_by=self.user,
status=Spectrum.STATUS.approved
if self.user and self.user.is_staff
else Spectrum.STATUS.pending,
)

# Set data (handles normalization)
spectrum.data = spec_data["data"]

# Override computed values if provided
if spec_data.get("peak_wave"):
spectrum.peak_wave = spec_data["peak_wave"]
if spec_data.get("scale_factor"):
spectrum.scale_factor = spec_data["scale_factor"]

spectrum.full_clean()
spectrum.save()
created_spectra.append(spectrum)

return created_spectra
Loading
Loading