Skip to content
Open
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
78ca86d
add upload script
ascibisz Oct 22, 2025
78b9b0a
add example data and more documentation
ascibisz Oct 22, 2025
a9b056b
point to correct collection
ascibisz Oct 22, 2025
f5f7a69
have server accept recipe as json object in body of request
ascibisz Oct 22, 2025
f87915a
update documentation
ascibisz Oct 22, 2025
1f2d2e3
remove accidential dockerfile changes
ascibisz Oct 22, 2025
bd8ec42
rename param json_recipe
ascibisz Oct 23, 2025
358158e
remove file that shouldn't be in this PR
ascibisz Jan 9, 2026
f0beaa1
remove accidential file
ascibisz Jan 9, 2026
a54ffa1
lint fixes
ascibisz Jan 9, 2026
3d01db3
refactor to try to improve clarity of json recipe vs file path
ascibisz Jan 21, 2026
529e15b
lint fixes
ascibisz Jan 21, 2026
63514c9
lint fix
ascibisz Jan 21, 2026
b2440cd
minimize changeset
ascibisz Jan 21, 2026
470e3a1
minimize changeset
ascibisz Jan 21, 2026
8a34898
simplify changeset
ascibisz Jan 21, 2026
45d438a
code cleanup
ascibisz Jan 22, 2026
c8fe120
minimize changeset
ascibisz Jan 22, 2026
ecc645d
remove trailing comma
ascibisz Jan 22, 2026
17ba17c
Feature/firebase lookup (#445)
rugeli Jan 29, 2026
79e77e8
Only upload simularium file once (#446)
ascibisz Feb 4, 2026
653285e
Maint/firebase collection cleanup (#448)
rugeli Feb 9, 2026
84d13c4
handle both recipe_path and json body requests (#449)
rugeli Feb 23, 2026
5770826
change error message body
ascibisz Mar 11, 2026
162ef12
lint fixes
ascibisz Mar 11, 2026
64c60c8
add more checks when attempting to read json body
ascibisz Mar 11, 2026
c1d5718
Merge branch 'feature/server-passed-recipe-json' of https://github.co…
rugeli Mar 18, 2026
3baff49
add upload script
ascibisz Oct 22, 2025
558b753
add example data and more documentation
ascibisz Oct 22, 2025
57468b0
point to correct collection
ascibisz Oct 22, 2025
4addc65
have server accept recipe as json object in body of request
ascibisz Oct 22, 2025
8ccacc3
update documentation
ascibisz Oct 22, 2025
497747f
remove accidential dockerfile changes
ascibisz Oct 22, 2025
a136b57
rename param json_recipe
ascibisz Oct 23, 2025
fdea7f1
remove file that shouldn't be in this PR
ascibisz Jan 9, 2026
d1686ee
remove accidential file
ascibisz Jan 9, 2026
3934ebe
lint fixes
ascibisz Jan 9, 2026
5fba3b1
refactor to try to improve clarity of json recipe vs file path
ascibisz Jan 21, 2026
8f49b0e
lint fixes
ascibisz Jan 21, 2026
728c19d
lint fix
ascibisz Jan 21, 2026
8f0c468
minimize changeset
ascibisz Jan 21, 2026
3fd95a4
minimize changeset
ascibisz Jan 21, 2026
2784825
simplify changeset
ascibisz Jan 21, 2026
e140122
code cleanup
ascibisz Jan 22, 2026
cf3b9ed
minimize changeset
ascibisz Jan 22, 2026
071aaf9
remove trailing comma
ascibisz Jan 22, 2026
fa148cc
Feature/firebase lookup (#445)
rugeli Jan 29, 2026
ac34219
Only upload simularium file once (#446)
ascibisz Feb 4, 2026
86b3104
Maint/firebase collection cleanup (#448)
rugeli Feb 9, 2026
45a10ab
handle both recipe_path and json body requests (#449)
rugeli Feb 23, 2026
801b86f
change error message body
ascibisz Mar 11, 2026
7a42705
lint fixes
ascibisz Mar 11, 2026
c2259af
add more checks when attempting to read json body
ascibisz Mar 11, 2026
d3c9c33
Merge branch 'feature/server-passed-recipe-json' of https://github.co…
rugeli Mar 25, 2026
e86069e
let recipe loader check the input and key stripping
rugeli Mar 25, 2026
bcdc065
Update cellpack/autopack/writers/__init__.py
rugeli Mar 30, 2026
e667517
use `isinstance` for AWSHandler, and misc
rugeli Mar 30, 2026
f9570ff
update aws tests
rugeli Mar 30, 2026
b2db8ec
initialize dedup_hash
rugeli Mar 30, 2026
4170c1e
Merge branch 'feature/server-passed-recipe-json' of https://github.co…
rugeli Mar 30, 2026
a8ab9ad
add in-line comment
rugeli Mar 30, 2026
fd12619
temp solution: use requirement.txt
rugeli Mar 30, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 0 additions & 22 deletions .github/workflows/cleanup-firebase.yml
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We realized the results collection in firebase is not necessary. For cellpack studio purposes, it is redundant with job_status and we only look to job_status. For locally run packings, we can skip uploading the result path to firebase at all and just directly open the simularium file. This allowed us to remove the cleanup code for the results collection (which was the only firebase cleanup we were doing in this repo anyways)

This file was deleted.

159 changes: 44 additions & 115 deletions cellpack/autopack/DBRecipeHandler.py
Copy link
Copy Markdown
Collaborator Author

@ascibisz ascibisz Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Major changes here:

  1. We realized the results collection in firebase is not necessary. For cellpack studio purposes, it is redundant with job_status and we only look to job_status. For locally run packings, we can skip uploading the result path to firebase at all and just directly open the simularium file. This allowed us to remove the whole ResultDoc class, the DBMaintenance class, and remove upload_result_metadata
  2. We're now using dedup_hash instead of job_id as the id for server packings
  3. Refactored upload_job_status to handle the functionality of update_outputs_directory since they were basically the same, and removed update_outputs_directory

Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import copy
import logging
import shutil
from datetime import datetime, timezone
from enum import Enum
from pathlib import Path

Expand All @@ -10,7 +9,6 @@

import hashlib
import json
import requests

from cellpack.autopack.utils import deep_merge

Expand Down Expand Up @@ -321,36 +319,6 @@ def __init__(self, settings):
self.settings = settings


class ResultDoc:
def __init__(self, db):
self.db = db

def handle_expired_results(self):
"""
Check if the results in the database are expired and delete them if the linked object expired.
"""
current_utc = datetime.now(timezone.utc)
results = self.db.get_all_docs("results")
if results:
for result in results:
result_data = self.db.doc_to_dict(result)
result_age = current_utc - result_data["timestamp"]
if result_age.days > 180 and not self.validate_existence(
result_data["url"]
):
self.db.delete_doc("results", self.db.doc_id(result))
logging.info("Results cleanup complete.")
else:
logging.info("No results found in the database.")

def validate_existence(self, url):
"""
Validate the existence of an S3 object by checking if the URL is accessible.
Returns True if the URL is accessible.
"""
return requests.head(url).status_code == requests.codes.ok


class DBUploader(object):
"""
Handles the uploading of data to the database.
Expand Down Expand Up @@ -529,42 +497,34 @@ def upload_config(self, config_data, source_path):
self.db.update_doc("configs", id, config_data)
return id

def upload_result_metadata(self, file_name, url, job_id=None):
"""
Upload the metadata of the result file to the database.
"""
if self.db:
username = self.db.get_username()
timestamp = self.db.create_timestamp()
self.db.update_or_create(
"results",
file_name,
{
"user": username,
"timestamp": timestamp,
"url": url,
"batch_job_id": job_id,
},
)
if job_id:
self.upload_job_status(job_id, "DONE", result_path=url)

def upload_job_status(self, job_id, status, result_path=None, error_message=None):
def upload_job_status(
self,
dedup_hash,
status,
result_path=None,
error_message=None,
outputs_directory=None,
):
"""
Update status for a given job ID
Update status for a given dedup_hash
"""
if self.db:
timestamp = self.db.create_timestamp()
self.db.update_or_create(
"job_status",
job_id,
{
"timestamp": timestamp,
"status": str(status),
"result_path": result_path,
"error_message": error_message,
},
)
db_handler = self.db
# If db is AWSHandler, switch to firebase handler for job status updates
if hasattr(self.db, "s3_client"):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to directly check if this is an AWSHandler using isinstance? Idk if it is possible for it to not have the s3_client attribute but just in case.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to your question above: no, self.db is not overwritten. In this case, db_handler is reassigned to a local variable that points to the Firebase handler, self.db remains as s3_client.

And I agree, isinstance would be more explicit and robust here, thanks for calling that out!

handler = DATABASE_IDS.handlers().get(DATABASE_IDS.FIREBASE)
db_handler = handler(default_db="staging")
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this overwrite the self.db attribute? Might be better to create a new handler object or use .copy() just in case

timestamp = db_handler.create_timestamp()
data = {
"timestamp": timestamp,
"status": str(status),
"error_message": error_message,
}
if result_path:
data["result_path"] = result_path
if outputs_directory:
data["outputs_directory"] = outputs_directory
db_handler.update_or_create("job_status", dedup_hash, data)

def save_recipe_and_config_to_output(self, output_folder, config_data, recipe_data):
output_path = Path(output_folder)
Expand All @@ -583,15 +543,15 @@ def upload_packing_results_workflow(
self,
source_folder,
recipe_name,
job_id,
dedup_hash,
config_data,
recipe_data,
):
"""
Complete packing results upload workflow including folder preparation and s3 upload
"""
try:
if job_id:
if dedup_hash:

source_path = Path(source_folder)
if not source_path.exists():
Expand All @@ -601,7 +561,7 @@ def upload_packing_results_workflow(

# prepare unique S3 upload folder
parent_folder = source_path.parent
unique_folder_name = f"{source_path.name}_run_{job_id}"
unique_folder_name = f"{source_path.name}_run_{dedup_hash}"
s3_upload_folder = parent_folder / unique_folder_name

logging.debug(f"outputs will be copied to: {s3_upload_folder}")
Expand All @@ -618,7 +578,7 @@ def upload_packing_results_workflow(
upload_result = self.upload_outputs_to_s3(
output_folder=s3_upload_folder,
recipe_name=recipe_name,
job_id=job_id,
dedup_hash=dedup_hash,
)

# clean up temporary folder after upload
Expand All @@ -628,9 +588,12 @@ def upload_packing_results_workflow(
f"Cleaned up temporary upload folder: {s3_upload_folder}"
)

# update outputs directory in firebase
self.update_outputs_directory(
job_id, upload_result.get("outputs_directory")
# update outputs directory in job status
self.upload_job_status(
dedup_hash,
"DONE",
result_path=upload_result.get("simularium_url"),
outputs_directory=upload_result.get("outputs_directory"),
)

return upload_result
Expand All @@ -639,15 +602,15 @@ def upload_packing_results_workflow(
logging.error(e)
return {"success": False, "error": e}

def upload_outputs_to_s3(self, output_folder, recipe_name, job_id):
def upload_outputs_to_s3(self, output_folder, recipe_name, dedup_hash):
"""
Upload packing outputs to S3 bucket
"""

bucket_name = self.db.bucket_name
region_name = self.db.region_name
sub_folder_name = self.db.sub_folder_name
s3_prefix = f"{sub_folder_name}/{recipe_name}/{job_id}"
s3_prefix = f"{sub_folder_name}/{recipe_name}/{dedup_hash}"

try:
upload_result = self.db.upload_directory(
Expand All @@ -661,8 +624,11 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id):
f"{base_url}/{file_info['s3_key']}"
for file_info in upload_result["uploaded_files"]
]
simularium_url = None
for url in public_urls:
if url.endswith(".simularium"):
simularium_url = url
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We had previously uploaded the result .simularium file twice for server run packings! To avoid that, now we're finding it in the uploaded outputs directory and keeping track of it's path to specifically reference in the job_status entry

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

outputs_directory = f"https://us-west-2.console.aws.amazon.com/s3/buckets/{bucket_name}/{s3_prefix}/"

logging.info(
f"Successfully uploaded {upload_result['total_files']} files to {outputs_directory}"
)
Expand All @@ -671,7 +637,7 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id):

return {
"success": True,
"run_id": job_id,
"dedup_hash": dedup_hash,
"s3_bucket": bucket_name,
"s3_prefix": s3_prefix,
"public_url_base": f"{base_url}/{s3_prefix}/",
Expand All @@ -680,30 +646,12 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id):
"total_size": upload_result["total_size"],
"urls": public_urls,
"outputs_directory": outputs_directory,
"simularium_url": simularium_url,
}
except Exception as e:
logging.error(e)
return {"success": False, "error": e}

def update_outputs_directory(self, job_id, outputs_directory):
if not self.db or self.db.s3_client:
# switch to firebase handler to update job status
handler = DATABASE_IDS.handlers().get("firebase")
initialized_db = handler(default_db="staging")
if job_id:
timestamp = initialized_db.create_timestamp()
initialized_db.update_or_create(
"job_status",
job_id,
{
"timestamp": timestamp,
"outputs_directory": outputs_directory,
},
)
logging.debug(
f"Updated outputs s3 location {outputs_directory} for job ID: {job_id}"
)


class DBRecipeLoader(object):
"""
Expand Down Expand Up @@ -890,23 +838,4 @@ def compile_db_recipe_data(db_recipe_data, obj_dict, grad_dict, comp_dict):
return recipe_data


class DBMaintenance(object):
"""
Handles the maintenance of the database.
"""

def __init__(self, db_handler):
self.db = db_handler
self.result_doc = ResultDoc(self.db)

def cleanup_results(self):
"""
Check if the results in the database are expired and delete them if the linked object expired.
"""
self.result_doc.handle_expired_results()

def readme_url(self):
"""
Return the URL to the README file for the database setup section.
"""
return "https://github.com/mesoscope/cellpack?tab=readme-ov-file#introduction-to-remote-databases"
DB_SETUP_README_URL = "https://github.com/mesoscope/cellpack?tab=readme-ov-file#introduction-to-remote-databases"
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this move to the top of the file?

1 change: 0 additions & 1 deletion cellpack/autopack/interface_objects/default_values.py
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We realized the results collection in firebase is not necessary. For cellpack studio purposes, it is redundant with job_status and we only look to job_status. For locally run packings, we can skip uploading the result path to firebase at all and just directly open the simularium file.

Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
"objects",
"gradients",
"recipes",
"results",
"configs",
"recipes_edited",
]
30 changes: 26 additions & 4 deletions cellpack/autopack/loaders/recipe_loader.py
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RecipeLoader needs to now accept a dictionary representing a JSON recipe, while maintaining the previous functionality for accepting an input_file_path. Fortunately didn't take much to make this work! But a few things of note

  1. I didn't know how best to change / document that RecipeLoader can be initialize with either a input_file_path OR a json_recipe, not both, but you need to have one. To try to clarify this, I made the from_json class method as the avenue to initialize a RecipeLoader with a JSON recipe, but I'm not sure if that's more clear or just more confusing. Open to feedback / suggestion on this
  2. to make _read work for json_recipes, we just had to add some default values and skip the part where we do the initial recipe read if we have a json_recipe loaded in

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One suggestion to handle multiple input streams:

  1. Update the RecipeLoader init to accept a single input_data argument which can be a path to a file or a dictionary. This should avoid potetntial bugs from conflicting inputs passed through input_file_path and json_recipe since there is only one input now.
  2. Handle resolving the path or dict inside the read() method. This might require getting rid of the file path and file extension attributes which are not really used I think

Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,21 @@ class RecipeLoader(object):
# TODO: add all default values here
default_values = default_recipe_values.copy()

def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False):
def __init__(
self,
input_file_path,
save_converted_recipe=False,
use_docker=False,
json_recipe=None,
):
_, file_extension = os.path.splitext(input_file_path)
self.current_version = CURRENT_VERSION
self.file_path = input_file_path
self.file_extension = file_extension
self.ingredient_list = []
self.compartment_list = []
self.save_converted_recipe = save_converted_recipe
self.json_recipe = json_recipe

# set CURRENT_RECIPE_PATH appropriately for remote(firebase) vs local recipes
if autopack.is_remote_path(self.file_path):
Expand All @@ -49,6 +56,15 @@ def __init__(self, input_file_path, save_converted_recipe=False, use_docker=Fals

self.recipe_data = self._read(use_docker=use_docker)

@classmethod
def from_json(cls, json_recipe, save_converted_recipe=False, use_docker=False):
return cls(
input_file_path="",
save_converted_recipe=save_converted_recipe,
use_docker=use_docker,
json_recipe=json_recipe,
)

@staticmethod
def _resolve_object(key, objects):
current_object = objects[key]
Expand Down Expand Up @@ -168,9 +184,15 @@ def _migrate_version(self, old_recipe):
)

def _read(self, resolve_inheritance=True, use_docker=False):
new_values, database_name, is_unnested_firebase = autopack.load_file(
self.file_path, cache="recipes", use_docker=use_docker
)
database_name = None
is_unnested_firebase = False
new_values = self.json_recipe
if new_values is None:
# Read recipe from filepath
new_values, database_name, is_unnested_firebase = autopack.load_file(
self.file_path, cache="recipes", use_docker=use_docker
)

if database_name == "firebase":
if is_unnested_firebase:
objects = new_values.get("objects", {})
Expand Down
Loading
Loading