|
| 1 | +from argparse import ArgumentParser |
| 2 | +from pathlib import Path |
| 3 | +import numpy as np |
| 4 | +import os |
| 5 | +import torch |
| 6 | +from torch.utils.data import DataLoader |
| 7 | +from tqdm import tqdm |
| 8 | +from viscy.data.triplet import TripletDataModule, TripletDataset |
| 9 | +import pandas as pd |
| 10 | +import warnings |
| 11 | + |
| 12 | +warnings.filterwarnings( |
| 13 | + "ignore", |
| 14 | + category=UserWarning, |
| 15 | + message="To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).", |
| 16 | +) |
| 17 | + |
| 18 | +# %% Paths and constants |
| 19 | +save_dir = ( |
| 20 | + "/hpc/mydata/alishba.imran/VisCy/applications/contrastive_phenotyping/embeddings4" |
| 21 | +) |
| 22 | + |
| 23 | +# rechunked data |
| 24 | +data_path = "/hpc/projects/intracellular_dashboard/viral-sensor/2024_02_04_A549_DENV_ZIKV_timelapse/2.2-register_annotations/updated_all_annotations.zarr" |
| 25 | + |
| 26 | +# updated tracking data |
| 27 | +tracks_path = "/hpc/projects/intracellular_dashboard/viral-sensor/2024_02_04_A549_DENV_ZIKV_timelapse/7.1-seg_track/tracking_v1.zarr" |
| 28 | + |
| 29 | +source_channel = ["background_mask", "uninfected_mask", "infected_mask"] |
| 30 | +z_range = (0, 1) |
| 31 | +batch_size = 1 # match the number of fovs being processed such that no data is left |
| 32 | +# set to 15 for full, 12 for infected, and 8 for uninfected |
| 33 | + |
| 34 | +# non-rechunked data |
| 35 | +data_path_1 = "/hpc/projects/intracellular_dashboard/viral-sensor/2024_02_04_A549_DENV_ZIKV_timelapse/7.1-seg_track/tracking_v1.zarr" |
| 36 | + |
| 37 | +# updated tracking data |
| 38 | +tracks_path_1 = "/hpc/projects/intracellular_dashboard/viral-sensor/2024_02_04_A549_DENV_ZIKV_timelapse/7.1-seg_track/tracking_v1.zarr" |
| 39 | + |
| 40 | +source_channel_1 = ["Nuclei_prediction_labels"] |
| 41 | + |
| 42 | + |
| 43 | +# %% Define the main function for training |
| 44 | +def main(hparams): |
| 45 | + # Initialize the data module for prediction, re-do embeddings but with size 224 by 224 |
| 46 | + data_module = TripletDataModule( |
| 47 | + data_path=data_path, |
| 48 | + tracks_path=tracks_path, |
| 49 | + source_channel=source_channel, |
| 50 | + z_range=z_range, |
| 51 | + initial_yx_patch_size=(224, 224), |
| 52 | + final_yx_patch_size=(224, 224), |
| 53 | + batch_size=batch_size, |
| 54 | + num_workers=hparams.num_workers, |
| 55 | + ) |
| 56 | + |
| 57 | + data_module.setup(stage="predict") |
| 58 | + |
| 59 | + print(f"Total prediction dataset size: {len(data_module.predict_dataset)}") |
| 60 | + |
| 61 | + dataloader = DataLoader( |
| 62 | + data_module.predict_dataset, |
| 63 | + batch_size=batch_size, |
| 64 | + num_workers=hparams.num_workers, |
| 65 | + ) |
| 66 | + |
| 67 | + # Initialize the second data module for segmentation masks |
| 68 | + seg_data_module = TripletDataModule( |
| 69 | + data_path=data_path_1, |
| 70 | + tracks_path=tracks_path_1, |
| 71 | + source_channel=source_channel_1, |
| 72 | + z_range=z_range, |
| 73 | + initial_yx_patch_size=(224, 224), |
| 74 | + final_yx_patch_size=(224, 224), |
| 75 | + batch_size=batch_size, |
| 76 | + num_workers=hparams.num_workers, |
| 77 | + ) |
| 78 | + |
| 79 | + seg_data_module.setup(stage="predict") |
| 80 | + |
| 81 | + seg_dataloader = DataLoader( |
| 82 | + seg_data_module.predict_dataset, |
| 83 | + batch_size=batch_size, |
| 84 | + num_workers=hparams.num_workers, |
| 85 | + ) |
| 86 | + |
| 87 | + # Initialize lists to store average values |
| 88 | + background_avg = [] |
| 89 | + uninfected_avg = [] |
| 90 | + infected_avg = [] |
| 91 | + |
| 92 | + for batch, seg_batch in tqdm( |
| 93 | + zip(dataloader, seg_dataloader), |
| 94 | + desc="Processing batches", |
| 95 | + total=len(data_module.predict_dataset), |
| 96 | + ): |
| 97 | + anchor = batch["anchor"] |
| 98 | + seg_anchor = seg_batch["anchor"].int() |
| 99 | + |
| 100 | + # Extract the fov_name and id from the batch |
| 101 | + fov_name = batch["index"]["fov_name"][0] |
| 102 | + cell_id = batch["index"]["id"].item() |
| 103 | + |
| 104 | + fov_dirs = fov_name.split("/") |
| 105 | + # Construct the path to the CSV file |
| 106 | + csv_path = os.path.join( |
| 107 | + tracks_path, *fov_dirs, f"tracks{fov_name.replace('/', '_')}.csv" |
| 108 | + ) |
| 109 | + |
| 110 | + # Read the CSV file |
| 111 | + df = pd.read_csv(csv_path) |
| 112 | + |
| 113 | + # Find the row with the specified id and extract the track_id |
| 114 | + track_id = df.loc[df["id"] == cell_id, "track_id"].values[0] |
| 115 | + |
| 116 | + # Create a boolean mask where segmentation values are equal to the track_id |
| 117 | + mask = seg_anchor == track_id |
| 118 | + # mask = (seg_anchor > 0) |
| 119 | + |
| 120 | + # Find the most frequent non-zero value in seg_anchor |
| 121 | + # unique, counts = np.unique(seg_anchor[seg_anchor > 0], return_counts=True) |
| 122 | + # most_frequent_value = unique[np.argmax(counts)] |
| 123 | + |
| 124 | + # # Create a boolean mask where segmentation values are equal to the most frequent value |
| 125 | + # mask = (seg_anchor == most_frequent_value) |
| 126 | + |
| 127 | + # Expand the mask to match the anchor tensor shape |
| 128 | + mask = mask.expand(1, 3, 1, 224, 224) |
| 129 | + |
| 130 | + # Calculate average values for each channel (background, uninfected, infected) using the mask |
| 131 | + background_avg.append(anchor[:, 0, :, :, :][mask[:, 0]].mean().item()) |
| 132 | + uninfected_avg.append(anchor[:, 1, :, :, :][mask[:, 1]].mean().item()) |
| 133 | + infected_avg.append(anchor[:, 2, :, :, :][mask[:, 2]].mean().item()) |
| 134 | + |
| 135 | + # Convert lists to numpy arrays |
| 136 | + background_avg = np.array(background_avg) |
| 137 | + uninfected_avg = np.array(uninfected_avg) |
| 138 | + infected_avg = np.array(infected_avg) |
| 139 | + |
| 140 | + print("Average values per cell for each mask calculated.") |
| 141 | + print("Background average shape:", background_avg.shape) |
| 142 | + print("Uninfected average shape:", uninfected_avg.shape) |
| 143 | + print("Infected average shape:", infected_avg.shape) |
| 144 | + |
| 145 | + # Save the averages as .npy files |
| 146 | + np.save(os.path.join(save_dir, "background_avg.npy"), background_avg) |
| 147 | + np.save(os.path.join(save_dir, "uninfected_avg.npy"), uninfected_avg) |
| 148 | + np.save(os.path.join(save_dir, "infected_avg.npy"), infected_avg) |
| 149 | + |
| 150 | + |
| 151 | +if __name__ == "__main__": |
| 152 | + parser = ArgumentParser() |
| 153 | + parser.add_argument("--backbone", type=str, default="resnet50") |
| 154 | + parser.add_argument("--margin", type=float, default=0.5) |
| 155 | + parser.add_argument("--lr", type=float, default=1e-3) |
| 156 | + parser.add_argument("--schedule", type=str, default="Constant") |
| 157 | + parser.add_argument("--log_steps_per_epoch", type=int, default=10) |
| 158 | + parser.add_argument("--embedding_len", type=int, default=256) |
| 159 | + parser.add_argument("--max_epochs", type=int, default=100) |
| 160 | + parser.add_argument("--accelerator", type=str, default="gpu") |
| 161 | + parser.add_argument("--devices", type=int, default=1) |
| 162 | + parser.add_argument("--num_nodes", type=int, default=1) |
| 163 | + parser.add_argument("--log_every_n_steps", type=int, default=1) |
| 164 | + parser.add_argument("--num_workers", type=int, default=8) |
| 165 | + args = parser.parse_args() |
| 166 | + main(args) |
0 commit comments