nmdp-bioinformatics
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 1 addition & 0 deletions b/‎Makefile‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎grim/conf/README.md‎ renamed to ‎conf/README.md‎
Lines changed: 2 additions & 2 deletions b/‎grim/conf/README.md‎ renamed to ‎conf/README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎grim/conf/minimal-configuration.json‎ renamed to ‎conf/minimal-configuration.json‎
Lines changed: 6 additions & 4 deletions b/‎grim/conf/minimal-configuration.json‎ renamed to ‎conf/minimal-configuration.json‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎data/freqs/CAU.freqs.gz‎
17.2 KB b/‎data/freqs/CAU.freqs.gz‎
17.2 KB
diff --git a/‎data/subjects/donor.csv‎
Lines changed: 1 addition & 0 deletions b/‎data/subjects/donor.csv‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎grim/imputation/graph_generation/LICENSE_INFO.rst‎ renamed to ‎graph_generation/LICENSE_INFO.rst‎ b/‎grim/imputation/graph_generation/LICENSE_INFO.rst‎ renamed to ‎graph_generation/LICENSE_INFO.rst‎
diff --git a/‎grim/imputation/graph_generation/Makefile‎ renamed to ‎graph_generation/Makefile‎ b/‎grim/imputation/graph_generation/Makefile‎ renamed to ‎graph_generation/Makefile‎
diff --git a/‎grim/imputation/graph_generation/README.bug‎ renamed to ‎graph_generation/README.bug‎
Lines changed: 0 additions & 1 deletion b/‎grim/imputation/graph_generation/README.bug‎ renamed to ‎graph_generation/README.bug‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎grim/imputation/graph_generation/README.md‎ renamed to ‎graph_generation/README.md‎
Lines changed: 10 additions & 11 deletions b/‎grim/imputation/graph_generation/README.md‎ renamed to ‎graph_generation/README.md‎
Lines changed: 10 additions & 11 deletions
@@ -116,6 +116,7 @@ venv.bak/
 .spyproject
 .idea
 .vscode
+*.swp
 
 # Rope project settings
 .ropeproject
@@ -137,3 +138,5 @@ allure_report/
 
 # cython temp files
 grim/**/*.c
+
+output/
@@ -90,6 +90,7 @@ docker: docker-build ## build a docker image and run the service
 
 install: clean ## install the package to the active Python's site-packages
 	pip install --upgrade pip
+	python3 setup.py build_ext --inplace
 	python setup.py install
 	pip install -r requirements.txt
 	pip install -r requirements-tests.txt
 
@@ -4,8 +4,8 @@
 | --- | --- |
 | populations | The population to consider them frequencies. |
 | priority | The coefficient values that define the priority matrix. |
-| loci_map| Loci full name Mapping for indexes. | 
-| freq_trim_threshold | The numerator in the frequency threshold. | 
+| loci_map| Loci full name Mapping for indexes. |
+| freq_trim_threshold | The numerator in the frequency threshold. |
 | factor_missing_data | factor to haplotype frequency in plan B in missing data case |
 | Plan_B_Matrix | matrix arranged by the most probable possibilities for recombination. The first element in the matrix should be the full haplotype. the indexes are corresponding to loci_map|
 | planb| True - use plan B anc C. False - use only Plan A. |
 
@@ -1,7 +1,6 @@
 {
   "populations": [
-    "FILII",
-      "NAMER"
+    "CAU"
   ],
   "freq_trim_threshold": 1e-5,
  "priority": {
@@ -37,12 +36,15 @@
   "number_of_pop_results": 100,
   "output_MUUG": true,
   "output_haplotypes": true,
-  "graph_files_path": "output/csv" ,
+  "freq_data_dir": "data/freqs" ,
+  "pops_count_file": "graph_generation/output/pop_ratio.txt" ,
+  "freq_file": "graph_generation/output/hpf.csv" ,
+  "graph_files_path": "graph_generation/output/csv/" ,
   "node_csv_file": "nodes.csv",
   "edges_csv_file": "edges.csv",
   "info_node_csv_file": "info_node.csv",
   "top_links_csv_file": "top_links.csv",
-  "imputation_in_file": "validation/simulation/data/simulated_donor.csv",
+  "imputation_in_file": "data/subjects/donor.csv",
   "imputation_out_umug_freq_filename": "don.umug",
   "imputation_out_umug_pops_filename": "don.umug.pops",
   "imputation_out_hap_freq_filename": "don.pmug",
 
@@ -0,0 +1 @@
+D1,A*01:02+A*02:01/A*03:01^B*15:01+B*15:01,CAU,CAU
@@ -21,4 +21,3 @@ $ cut -f1,2 -d',' output/csv/nemo/edges.csv |sort |uniq -c |sort -rn |more
  539 117913,117365
  539 117884,117365
  515 117918,117370
-
@@ -13,25 +13,25 @@
 		```
 
 - Python 3
-	- On MacOS install with 
+	- On MacOS install with
 		```
 		brew install python3
 		```
 
 - Install Neo4J
-	- On MacOS install with 
+	- On MacOS install with
 		```
 		brew install neo4j
 		```
 
 	- Setup NEO4J_HOME
-    
+
         Point NEO4J_HOME to the root of the NEO4J directory.
 		```
 		export NEO4J_HOME=/usr/local/Cellar/neo4j/3.2.2/libexec
 		```
 
-### Linux 
+### Linux
 - JDK 8
 	- Install JDK 1.8 from Oracle
 	- add JAVA_HOME to ~/.bash_profile
@@ -51,11 +51,11 @@
         ```
 
 	- Point NEO4J_HOME to the root of the uncompressed NEO4J directory and add the following line to ~/.bash_profile
-    
+
 		```
 		export NEO4J_HOME=path/to/neo4j-community-3.5.7
 		```
-        
+
 
 
 # Using Makefile
@@ -99,12 +99,12 @@ make nemo
 
 To use a different set of frequencies use the following procedure:
 
-- Starting in the graph generator directory, convert the data from frequency format to hpf (haplotype, population, frequency).  
-``` 
-   python nemo_to_hpf_csv.py 
+- Starting in the graph generator directory, convert the data from frequency format to hpf (haplotype, population, frequency).
+```
+   python nemo_to_hpf_csv.py
 ```
 
-- This program looks for a data/NEMO2011 directory and reads the individual frequency files and generates this csv: 
+- This program looks for a data/NEMO2011 directory and reads the individual frequency files and generates this csv:
 ```
     output/hpf.csv
 ```
@@ -120,4 +120,3 @@ To use a different set of frequencies use the following procedure:
 	└── top_links.csv
 ```
     Note: there is an option to trim the frequency set below a frequency threshold.  If the trimming threshold is 1e-6 it will take 9m35s to generate the graph csv files on a mid-2015 MacBook Pro (2.5 GHz Intel Core i7) and will result in 1,088,817 nodes (159MB), 14,868,976 edges (2.0GB)and 5,947,591 top links (108MB).
-
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+D1,A01:02+A02:01/A03:01^B15:01+B*15:01,CAU,CAU`