|
5 | 5 | from dimspy.tools import replicate_filter
|
6 | 6 | from dimspy.tools import create_sample_list
|
7 | 7 | from dimspy.tools import align_samples
|
| 8 | +from dimspy.tools import hdf5_peak_matrix_to_txt |
8 | 9 | from dimspy.tools import blank_filter
|
9 | 10 | from dimspy.tools import sample_filter
|
10 | 11 | from dimspy.portals.hdf5_portal import save_peaklists_as_hdf5, save_peak_matrix_as_hdf5
|
|
13 | 14 |
|
14 | 15 | def main():
|
15 | 16 |
|
16 |
| - # Example 1 - mzML files (zip file) |
17 |
| - source = os.path.join("Y:\users\zhangcy\polar_positive_data") |
18 |
| - fn_filelist = os.path.join("Y:\users\zhangcy\polar_positive_data\pos_filelist.txt") |
19 |
| - output = os.path.join("Y:\\users\\zhangcy\\polar_positive_data_results\\") |
20 |
| - |
21 |
| - source = os.path.join("Y:\users\zhangcy\polar_positive_data") |
22 |
| - fn_filelist = os.path.join("E:\\raw_109\pos_filelist.txt") |
23 |
| - output = os.path.join("E:\\raw_109") |
| 17 | + source = os.path.join("..", "tests", "data", "MTBLS79_subset", "MTBLS79_mzml_triplicates.zip") |
| 18 | + fn_filelist = os.path.join("..", "tests", "data", "MTBLS79_subset", "filelist_mzml_triplicates.txt") |
| 19 | + output = os.path.join("..", "tests", "test_results") |
24 | 20 |
|
25 | 21 | print "Process Scans....."
|
26 |
| - pls = process_scans(source, min_scans=1, function_noise="noise_packets", |
| 22 | + pls = process_scans(source, min_scans=1, function_noise="median", |
27 | 23 | snr_thres=3.0, ppm=2.0, min_fraction=None, rsd_thres=None,
|
28 |
| - filelist=fn_filelist, remove_mz_range=[], filter_scan_events={"exclude":[[50.0, 620.0, "full"]]}, block_size=2000, ncpus=None) |
| 24 | + filelist=fn_filelist, remove_mz_range=[], block_size=5000, ncpus=None) |
29 | 25 | print "Finished"
|
30 |
| - |
31 |
| - sample_list = os.path.join("E:\\raw_109\\sample_list.txt") |
32 |
| - create_sample_list(pls, sample_list, delimiter="\t") |
33 |
| - |
34 | 26 | print
|
35 |
| - print pls |
36 |
| - for pl in pls: |
37 |
| - print pl.ID, pl.shape |
38 |
| - with open(os.path.join(output, pl.ID + ".txt"), "w") as out: out.write(pl.to_str("\t")) |
39 | 27 |
|
40 |
| - """ |
41 |
| - save_peaklists_as_hdf5(pls, os.path.join(output, "pls.h5")) |
42 |
| - pls = load_peaklists_from_hdf5(os.path.join(output, "pls.h5")) |
43 |
| - """ |
44 |
| - |
45 |
| - print |
46 | 28 | print "Replicate Filter....."
|
47 |
| - logfile = os.path.join("E:\\raw_109\\log_replicate_filter.txt") |
48 |
| - pls_rf = replicate_filter(pls, ppm=2.0, replicates=3, min_peaks=2, rsd_thres=None, quality_logfile=logfile) |
| 29 | + logfile = os.path.join(output, "log_replicate_filter.txt") |
| 30 | + pls_rf = replicate_filter(pls, ppm=2.0, replicates=3, min_peaks=2, rsd_thres=None, report=logfile, block_size=5000) |
49 | 31 | print "Finished"
|
50 | 32 | print
|
51 | 33 |
|
52 |
| - sample_list = os.path.join("E:\\raw_109\\sample_list.txt") |
| 34 | + print "Create a new sample list" |
| 35 | + sample_list = os.path.join(output, "sample_list.txt") |
53 | 36 | create_sample_list(pls_rf, sample_list, delimiter="\t")
|
54 |
| - """ |
55 |
| - save_peaklists_as_hdf5(pls, os.path.join(output, "pls_rf.h5")) |
| 37 | + print "Finished" |
| 38 | + print |
56 | 39 |
|
57 | 40 | print "Align Samples...."
|
58 |
| - pm = align_samples(pls_rf, ppm=3.0) |
| 41 | + pm = align_samples(pls_rf, ppm=3.0, ncpus=1, block_size=5000) |
59 | 42 | print "Finished", pm.shape
|
60 | 43 | print
|
61 | 44 |
|
62 | 45 | save_peak_matrix_as_hdf5(pm, os.path.join(output, "pm.h5"))
|
| 46 | + hdf5_peak_matrix_to_txt(os.path.join(output, "pm.h5"), path_out=os.path.join(output, "pm.txt"), attr_name="intensity", comprehensive=True) |
| 47 | + |
| 48 | + pm = load_peak_matrix_from_hdf5(os.path.join(output, "pm.h5")) |
63 | 49 |
|
64 | 50 | print "Blank Filter"
|
65 | 51 | pm_bf = blank_filter(pm, "blank", min_fraction=1.0, min_fold_change=10.0, function="mean", rm_samples=True)
|
66 | 52 | print "Finished", pm_bf.shape
|
67 | 53 | print
|
68 | 54 |
|
69 |
| - save_peak_matrix_as_hdf5(pm_bf, os.path.join(output, "pm_bf.h5")) |
70 |
| -
|
71 | 55 | print "Sample Filter"
|
72 | 56 | pm_bf_sf = sample_filter(pm, 0.8, within=False)
|
73 | 57 | print "Finished", pm_bf_sf.shape
|
74 | 58 | print
|
75 | 59 |
|
76 |
| - save_peak_matrix_as_hdf5(pm_bf_sf, os.path.join(output, "pm_bf_sf.h5")) |
77 |
| - """ |
78 |
| - |
79 | 60 | if __name__ == '__main__':
|
80 | 61 | main()
|
0 commit comments