Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
5708fa4
add DatasetABC CCDLDataset and UserDataset models with their attrs
avrohomgottlieb Jan 7, 2026
f9a7967
add ccdl logic to ccdldataset model
avrohomgottlieb Jan 8, 2026
e96040f
add user logic to userdataset model
avrohomgottlieb Jan 8, 2026
e657f7d
add common dataset logic to datasetabc model
avrohomgottlieb Jan 8, 2026
1ef1550
update reference names on abc to follow django conventions
avrohomgottlieb Jan 8, 2026
e111b4e
add migration for user dataset and ccdl dataset classes
avrohomgottlieb Jan 8, 2026
80189f1
add ccdl and user dataset attrs to job model
avrohomgottlieb Jan 9, 2026
3992909
add populate_datasets hook to migration
avrohomgottlieb Jan 9, 2026
9987862
call data migration functions, improve reversal logic
avrohomgottlieb Jan 9, 2026
51402e5
remove unnecessary reverse function, fix triggers error
avrohomgottlieb Jan 12, 2026
21d61c8
make ccdl name ccdl dataset attr non nullable
avrohomgottlieb Jan 12, 2026
42b2b3c
add test_ccdl_dataset tests, update expected values, readme file, and…
avrohomgottlieb Jan 12, 2026
635a8bf
add test_user_dataset tests and references
avrohomgottlieb Jan 12, 2026
a89b54f
add test_dataset_abcc tests and references
avrohomgottlieb Jan 13, 2026
d51b080
add type hints to readme file
avrohomgottlieb Jan 13, 2026
d4a138f
update create_ccdl_datasets and computed_file references
avrohomgottlieb Jan 13, 2026
560877b
add generic foreign key and generic relation for dataset jobs, add ne…
avrohomgottlieb Jan 15, 2026
2a5a322
add dataset validation to job model, update references
avrohomgottlieb Jan 15, 2026
757797d
rename dataset_id to dataset_object_id in generic relation
avrohomgottlieb Jan 15, 2026
1e2dbd0
update references in test job
avrohomgottlieb Jan 15, 2026
7a117cf
add test which asserts that dataset attr is subtype of DatasetABC
avrohomgottlieb Jan 15, 2026
4a5fede
update references in remaining tests
avrohomgottlieb Jan 15, 2026
622ed53
rename files - ccdl to ccdl_dataset, user to user_dataset
avrohomgottlieb Jan 15, 2026
d2cdd6b
move regenerated_from attr from datasetabc to user_dataset
avrohomgottlieb Jan 15, 2026
b30e55a
add download_filename and download_url defaults to datasetabc, remove…
avrohomgottlieb Jan 15, 2026
036fd2e
rename ccdl dataset methods
avrohomgottlieb Jan 15, 2026
7ed6e3a
remove foreign key relationship between job and dataset models
avrohomgottlieb Jan 15, 2026
e6d0650
add datasetabc type hints
avrohomgottlieb Jan 15, 2026
ab9fbb8
add fallbacks for batch job queue and definition in JobFactory
avrohomgottlieb Jan 15, 2026
e034f1a
update expected_values
avrohomgottlieb Jan 15, 2026
1abb095
move dataset population run python logic from 0080 to 0081
avrohomgottlieb Jan 22, 2026
d211b57
update apply and reverse dataset job functions
avrohomgottlieb Jan 22, 2026
7488520
fix datasets population reverse function
avrohomgottlieb Jan 22, 2026
b64630a
correct assignment logic in apply_dataset_jobs function
avrohomgottlieb Jan 22, 2026
55e3c19
set dataset_content_type field on delete to SET_NULL
avrohomgottlieb Jan 22, 2026
ac6118b
update error validation error message
avrohomgottlieb Jan 22, 2026
194b9f5
add custom job queryset and manager to handle bulk job dataset operat…
avrohomgottlieb Jan 22, 2026
336519f
update ccdl dataset serializer and view
avrohomgottlieb Jan 22, 2026
b293777
update ccdl dataset serializer and view tests
avrohomgottlieb Jan 22, 2026
63e0ad1
update user dataset serializer and dataset view
avrohomgottlieb Jan 22, 2026
503596e
update tests, further update view
avrohomgottlieb Jan 23, 2026
e1c4306
split apply and reverse dataset functions by concrete class
avrohomgottlieb Jan 28, 2026
86c5d64
replace assignment of isCCDL var with ccdl_name attr in exchange for …
avrohomgottlieb Jan 28, 2026
7c940d5
remove start attr from CCDLDatasetSerializer
avrohomgottlieb Jan 28, 2026
55e7496
remove email attr from CCDLDatasetSerializer
avrohomgottlieb Jan 28, 2026
2c56d5e
reduce queries in apply methods
avrohomgottlieb Jan 28, 2026
99e689a
Merge pull request #1782 from AlexsLemonade/dev
avrohomgottlieb Jan 30, 2026
ea4930c
Merge branch 'feature/dataset-abc' into avrohom/1713-dataset-abc-add-…
avrohomgottlieb Jan 30, 2026
aed7b87
propogate change to get_project_modality_samples in dataset to datase…
avrohomgottlieb Jan 30, 2026
b2cb7d5
update expected values
avrohomgottlieb Jan 30, 2026
105000d
Merge branch 'avrohom/1713-dataset-abc-add-new-classes' into avrohom/…
avrohomgottlieb Jan 30, 2026
cf45ab8
Merge pull request #1740 from AlexsLemonade/avrohom/1713-dataset-abc-…
avrohomgottlieb Jan 30, 2026
98a1a39
Merge pull request #1769 from AlexsLemonade/avrohom/1714-dataset-abc-…
avrohomgottlieb Jan 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from django.template.defaultfilters import pluralize

from scpca_portal.config.logging import get_and_configure_logger
from scpca_portal.models import Dataset, Job
from scpca_portal.models import CCDLDataset, Job

logger = get_and_configure_logger(__name__)

Expand Down Expand Up @@ -44,7 +44,7 @@ def handle(self, *args, **kwargs):
self.create_ccdl_datasets(**kwargs)

def create_ccdl_datasets(self, ignore_hash, retry_failed_jobs, **kwargs) -> None:
created_datasets, updated_datasets = Dataset.create_or_update_ccdl_datasets(
created_datasets, updated_datasets = CCDLDataset.create_or_update_ccdl_datasets(
ignore_hash=ignore_hash
)
if created_datasets:
Expand Down
236 changes: 236 additions & 0 deletions api/scpca_portal/migrations/0080_ccdldataset_userdataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
# Generated by Django 3.2.25 on 2026-01-09 18:35

import uuid

import django.contrib.postgres.fields
import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("scpca_portal", "0079_auto_20251119_1503"),
]

operations = [
migrations.CreateModel(
name="UserDataset",
fields=[
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
(
"id",
models.UUIDField(
default=uuid.uuid4, editable=False, primary_key=True, serialize=False
),
),
(
"format",
models.TextField(
choices=[
("ANN_DATA", "AnnData"),
("SINGLE_CELL_EXPERIMENT", "Single-cell experiment"),
("METADATA", "Metadata"),
]
),
),
("data", models.JSONField(default=dict)),
("email", models.EmailField(max_length=254, null=True)),
("start", models.BooleanField(default=False)),
("data_hash", models.CharField(max_length=32, null=True)),
("metadata_hash", models.CharField(max_length=32, null=True)),
("readme_hash", models.CharField(max_length=32, null=True)),
("combined_hash", models.CharField(max_length=32, null=True)),
("includes_files_bulk", models.BooleanField(default=False)),
("includes_files_cite_seq", models.BooleanField(default=False)),
("includes_files_merged", models.BooleanField(default=False)),
("includes_files_multiplexed", models.BooleanField(default=False)),
("estimated_size_in_bytes", models.BigIntegerField(default=0)),
("started_at", models.DateTimeField(null=True)),
("is_started", models.BooleanField(default=False)),
("pending_at", models.DateTimeField(null=True)),
("is_pending", models.BooleanField(default=False)),
("processing_at", models.DateTimeField(null=True)),
("is_processing", models.BooleanField(default=False)),
("succeeded_at", models.DateTimeField(null=True)),
("is_succeeded", models.BooleanField(default=False)),
("failed_at", models.DateTimeField(null=True)),
("is_failed", models.BooleanField(default=False)),
("failed_reason", models.TextField(null=True)),
("expires_at", models.DateTimeField(null=True)),
("is_expired", models.BooleanField(default=False)),
("terminated_at", models.DateTimeField(null=True)),
("is_terminated", models.BooleanField(default=False)),
("terminated_reason", models.TextField(null=True)),
("total_sample_count", models.BigIntegerField(default=0)),
("diagnoses_summary", models.JSONField(default=dict)),
("files_summary", models.JSONField(default=list)),
("project_diagnoses", models.JSONField(default=dict)),
("project_modality_counts", models.JSONField(default=dict)),
(
"modality_count_mismatch_projects",
django.contrib.postgres.fields.ArrayField(
base_field=models.TextField(), default=list, size=None
),
),
("project_sample_counts", models.JSONField(default=dict)),
("project_titles", models.JSONField(default=dict)),
(
"computed_file",
models.OneToOneField(
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="userdataset",
to="scpca_portal.computedfile",
),
),
(
"download_tokens",
models.ManyToManyField(
related_name="downloaded_userdataset_set", to="scpca_portal.APIToken"
),
),
(
"regenerated_from",
models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="regenerated_userdataset_set",
to="scpca_portal.userdataset",
),
),
(
"token",
models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="userdataset_set",
to="scpca_portal.apitoken",
),
),
],
options={
"db_table": "user_datasets",
"ordering": ["updated_at"],
"get_latest_by": "updated_at",
},
),
migrations.CreateModel(
name="CCDLDataset",
fields=[
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
(
"id",
models.UUIDField(
default=uuid.uuid4, editable=False, primary_key=True, serialize=False
),
),
(
"format",
models.TextField(
choices=[
("ANN_DATA", "AnnData"),
("SINGLE_CELL_EXPERIMENT", "Single-cell experiment"),
("METADATA", "Metadata"),
]
),
),
("data", models.JSONField(default=dict)),
("email", models.EmailField(max_length=254, null=True)),
("start", models.BooleanField(default=False)),
("data_hash", models.CharField(max_length=32, null=True)),
("metadata_hash", models.CharField(max_length=32, null=True)),
("readme_hash", models.CharField(max_length=32, null=True)),
("combined_hash", models.CharField(max_length=32, null=True)),
("includes_files_bulk", models.BooleanField(default=False)),
("includes_files_cite_seq", models.BooleanField(default=False)),
("includes_files_merged", models.BooleanField(default=False)),
("includes_files_multiplexed", models.BooleanField(default=False)),
("estimated_size_in_bytes", models.BigIntegerField(default=0)),
("started_at", models.DateTimeField(null=True)),
("is_started", models.BooleanField(default=False)),
("pending_at", models.DateTimeField(null=True)),
("is_pending", models.BooleanField(default=False)),
("processing_at", models.DateTimeField(null=True)),
("is_processing", models.BooleanField(default=False)),
("succeeded_at", models.DateTimeField(null=True)),
("is_succeeded", models.BooleanField(default=False)),
("failed_at", models.DateTimeField(null=True)),
("is_failed", models.BooleanField(default=False)),
("failed_reason", models.TextField(null=True)),
("expires_at", models.DateTimeField(null=True)),
("is_expired", models.BooleanField(default=False)),
("terminated_at", models.DateTimeField(null=True)),
("is_terminated", models.BooleanField(default=False)),
("terminated_reason", models.TextField(null=True)),
(
"ccdl_name",
models.TextField(
choices=[
("ALL_METADATA", "All Metadata"),
(
"SINGLE_CELL_SINGLE_CELL_EXPERIMENT",
"Single Cell Single Cell Experiment",
),
(
"SINGLE_CELL_SINGLE_CELL_EXPERIMENT_NO_MULTIPLEXED",
"Single Cell Single Cell Experiment No Multiplexed",
),
(
"SINGLE_CELL_SINGLE_CELL_EXPERIMENT_MERGED",
"Single Cell Single Cell Experiment Merged",
),
("SINGLE_CELL_ANN_DATA", "Single Cell Ann Data"),
("SINGLE_CELL_ANN_DATA_MERGED", "Single Cell Ann Data Merged"),
("SPATIAL_SPATIAL_SPACERANGER", "Spatial Spatial Spaceranger"),
],
),
),
("ccdl_project_id", models.TextField(null=True)),
(
"ccdl_modality",
models.TextField(
choices=[
("BULK_RNA_SEQ", "Bulk RNA-seq"),
("CITE_SEQ", "CITE-seq"),
("MULTIPLEXED", "Multiplexed"),
("SINGLE_CELL", "Single-cell"),
("SPATIAL", "Spatial Data"),
],
null=True,
),
),
(
"computed_file",
models.OneToOneField(
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="ccdldataset",
to="scpca_portal.computedfile",
),
),
(
"download_tokens",
models.ManyToManyField(
related_name="downloaded_ccdldataset_set", to="scpca_portal.APIToken"
),
),
(
"token",
models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="ccdldataset_set",
to="scpca_portal.apitoken",
),
),
],
options={
"db_table": "ccdl_datasets",
"ordering": ["updated_at"],
"get_latest_by": "updated_at",
},
),
]
Loading