|
24 | 24 | create_split_files, make_combinations_double_all_diverse, |
25 | 25 | make_combinations_triple_all_diverse, make_combinations_quadruple_all_diverse, |
26 | 26 | make_ssm_singles |
27 | | -) # not yet implemented: make_combinations_double_all_diverse_and_all_positions |
28 | | - |
| 27 | +) |
| 28 | +from pypef.utils.split import DatasetSplitter |
29 | 29 | from pypef.utils.directed_evolution import DirectedEvolution |
30 | 30 | from pypef.utils.sto2a2m import convert_sto2a2m |
31 | 31 |
|
|
34 | 34 |
|
35 | 35 |
|
36 | 36 | def run_pypef_utils(arguments): |
37 | | - if arguments['mklsts']: |
| 37 | + if arguments['mklsts'] or ['mklsts_rnd'] or ['mklsts_mod'] or ['mklsts_cont'] or ['mklsts_plot']: |
38 | 38 | wt_sequence = get_wt_sequence(arguments['--wt']) |
39 | 39 | t_drop = float(arguments['--drop']) |
40 | 40 | ls_proportion = arguments['--ls_proportion'] |
41 | | - |
42 | 41 | logger.info(f'Length of provided sequence: {len(wt_sequence)} amino acids.') |
43 | 42 | logger.info(f'Training set proportion (--ls_proportion): {ls_proportion}.') |
44 | 43 | df = drop_rows(arguments['--input'], amino_acids, t_drop, |
45 | 44 | arguments['--sep'], arguments['--mutation_sep']) |
46 | 45 | no_rnd = arguments['--numrnd'] |
47 | | - |
48 | 46 | single_variants, single_values, higher_variants, higher_values = get_variants( |
49 | 47 | df, amino_acids, wt_sequence, arguments['--mutation_sep'] |
50 | 48 | ) |
51 | 49 | if len(single_variants) == 0: |
52 | 50 | logger.info('Found no single substitution variants for possible recombination!') |
53 | | - sub_ls, val_ls, sub_ts, val_ts = make_sub_ls_ts( |
54 | | - single_variants, single_values, |
55 | | - higher_variants, higher_values, |
56 | | - ls_proportion |
57 | | - ) |
58 | | - logger.info('Tip: You can edit your LS and TS datasets just by ' |
59 | | - 'cutting/pasting between the LS and TS fasta datasets.') |
60 | 51 |
|
61 | | - make_fasta_ls_ts('LS.fasl', wt_sequence, sub_ls, val_ls) |
62 | | - make_fasta_ls_ts('TS.fasl', wt_sequence, sub_ts, val_ts) |
| 52 | + if arguments['mklsts']: |
| 53 | + sub_ls, val_ls, sub_ts, val_ts = make_sub_ls_ts( |
| 54 | + single_variants, single_values, |
| 55 | + higher_variants, higher_values, |
| 56 | + ls_proportion |
| 57 | + ) |
| 58 | + logger.info('Tip: You can edit your LS and TS datasets just by ' |
| 59 | + 'cutting/pasting between the LS and TS fasta datasets.') |
63 | 60 |
|
64 | | - try: |
65 | | - no_rnd = int(no_rnd) |
66 | | - except ValueError: |
67 | | - no_rnd = 0 |
68 | | - if no_rnd != 0: |
69 | | - random_set_counter = 1 |
70 | | - no_rnd = int(no_rnd) |
71 | | - while random_set_counter <= no_rnd: |
72 | | - sub_ls, val_ls, sub_ts, val_ts = make_sub_ls_ts_randomly( |
73 | | - single_variants, single_values, |
74 | | - higher_variants, higher_values, |
75 | | - ls_proportion |
76 | | - ) |
77 | | - make_fasta_ls_ts('LS_random_' + str(random_set_counter) + '.fasl', wt_sequence, sub_ls, val_ls) |
78 | | - make_fasta_ls_ts('TS_random_' + str(random_set_counter) + '.fasl', wt_sequence, sub_ts, val_ts) |
79 | | - random_set_counter += 1 |
| 61 | + make_fasta_ls_ts('LS.fasl', wt_sequence, sub_ls, val_ls) |
| 62 | + make_fasta_ls_ts('TS.fasl', wt_sequence, sub_ts, val_ts) |
| 63 | + |
| 64 | + try: |
| 65 | + no_rnd = int(no_rnd) |
| 66 | + except ValueError: |
| 67 | + no_rnd = 0 |
| 68 | + if no_rnd != 0: |
| 69 | + random_set_counter = 1 |
| 70 | + no_rnd = int(no_rnd) |
| 71 | + while random_set_counter <= no_rnd: |
| 72 | + sub_ls, val_ls, sub_ts, val_ts = make_sub_ls_ts_randomly( |
| 73 | + single_variants, single_values, |
| 74 | + higher_variants, higher_values, |
| 75 | + ls_proportion |
| 76 | + ) |
| 77 | + make_fasta_ls_ts('LS_random_' + str(random_set_counter) + '.fasl', wt_sequence, sub_ls, val_ls) |
| 78 | + make_fasta_ls_ts('TS_random_' + str(random_set_counter) + '.fasl', wt_sequence, sub_ts, val_ts) |
| 79 | + random_set_counter += 1 |
| 80 | + else: |
| 81 | + ds = DatasetSplitter(df) |
| 82 | + if arguments['mklsts_rnd']: |
| 83 | + pass # TODO |
| 84 | + |
| 85 | + elif arguments['mklsts_mod']: |
| 86 | + pass # TODO |
| 87 | + |
| 88 | + elif arguments['mklsts_cont']: |
| 89 | + pass # TODO |
| 90 | + |
| 91 | + elif arguments['mklsts_plot']: |
| 92 | + pass # TODO |
80 | 93 |
|
81 | 94 | elif arguments['mkps']: |
82 | 95 | wt_sequence = get_wt_sequence(arguments['--wt']) |
|
0 commit comments