Fix dataset whiten and add scanpy requirement

atong01 · atong01 · commit 8eb8831ba524 · 2022-02-08T23:55:21.000-05:00
diff --git a/TrajectoryNet/dataset.py b/TrajectoryNet/dataset.py
@@ -7,7 +7,6 @@
 import numpy as np
 import torch
 import scipy.sparse
-import scanpy as sc
 
 
 from sklearn.preprocessing import StandardScaler
@@ -201,13 +200,14 @@ def load(self, data_file, max_dim):
         self.labels = self.data_dict["sample_labels"]
         if self.embedding_name not in self.data_dict.keys():
             raise ValueError("Unknown embedding name %s" % self.embedding_name)
-        embedding = self.data_dict[self.embedding_name]
-        scaler = StandardScaler()
-        scaler.fit(embedding)
-        self.ncells = embedding.shape[0]
+        self.data = self.data_dict[self.embedding_name]
+        if self.args.whiten:
+            scaler = StandardScaler()
+            scaler.fit(self.data)
+            self.data = scaler.transform(self.data)
+        self.ncells = self.data.shape[0]
         assert self.labels.shape[0] == self.ncells
         # Scale so that embedding is normally distributed
-        self.data = scaler.transform(embedding)
 
         delta_name = "delta_%s" % self.embedding_name
         if delta_name not in self.data_dict.keys():
@@ -217,10 +217,11 @@ def load(self, data_file, max_dim):
             )
             self.use_velocity = False
         else:
-            delta = self.data_dict[delta_name]
-            assert delta.shape[0] == self.ncells
+            self.velocity = self.data_dict[delta_name]
+            assert self.velocity.shape[0] == self.ncells
             # Normalize ignoring mean from embedding
-            self.velocity = delta / scaler.scale_
+            if self.args.whiten:
+                self.velocity = self.velocity / scaler.scale_
 
         if max_dim is not None and self.data.shape[1] > max_dim:
             print("Warning: Clipping dimensionality to %d" % max_dim)
@@ -302,6 +303,8 @@ def load(self):
 
 class CustomAnnDataFromFile(CustomAnnData):
     def __init__(self, name, args):
+        import scanpy as sc
+
         adata = sc.read_h5ad(name)
         super().__init__(adata, args)
 
diff --git a/requirements.txt b/requirements.txt
@@ -2,6 +2,7 @@ argparse
 matplotlib>=3.2.1
 numpy>=1.18.4
 POT>=0.7.0
+scanpy
 scikit-learn>=0.23.1
 scipy>=1.4.1
 torch>=1.5.0
diff --git a/setup.py b/setup.py
@@ -7,6 +7,7 @@
     "matplotlib>=3.2.1",
     "numpy>=1.18.4",
     "POT>=0.7.0",
+    "scanpy",
     "scikit-learn>=0.23.1",
     "scipy>=1.4.1",
     "torch>=1.5.0",