diff --git a/.travis.yml b/.travis.yml index 82f849d..f67df2f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,7 +16,7 @@ install: script: - pytest --cov=./ - + - flake8 after_success: - codecov diff --git a/data/.b3_frm_human_b1_r0_400ns_noPBCWhole_noJump_Center_SKIP10.xtc_offsets.npz b/data/.b3_frm_human_b1_r0_400ns_noPBCWhole_noJump_Center_SKIP10.xtc_offsets.npz index 5fc07c7..3d20b16 100644 Binary files a/data/.b3_frm_human_b1_r0_400ns_noPBCWhole_noJump_Center_SKIP10.xtc_offsets.npz and b/data/.b3_frm_human_b1_r0_400ns_noPBCWhole_noJump_Center_SKIP10.xtc_offsets.npz differ diff --git a/data/.b3_frm_human_b1_r1_400ns_noPBCWhole_noJump_Center_SKIP10.xtc_offsets.npz b/data/.b3_frm_human_b1_r1_400ns_noPBCWhole_noJump_Center_SKIP10.xtc_offsets.npz index fa7d459..7fbb862 100644 Binary files a/data/.b3_frm_human_b1_r1_400ns_noPBCWhole_noJump_Center_SKIP10.xtc_offsets.npz and b/data/.b3_frm_human_b1_r1_400ns_noPBCWhole_noJump_Center_SKIP10.xtc_offsets.npz differ diff --git a/orientation/__pycache__/calc_angles.cpython-37.pyc b/orientation/__pycache__/calc_angles.cpython-37.pyc new file mode 100644 index 0000000..8367877 Binary files /dev/null and b/orientation/__pycache__/calc_angles.cpython-37.pyc differ diff --git a/orientation/__pycache__/setup_system.cpython-37.pyc b/orientation/__pycache__/setup_system.cpython-37.pyc index 3866d58..1e48608 100644 Binary files a/orientation/__pycache__/setup_system.cpython-37.pyc and b/orientation/__pycache__/setup_system.cpython-37.pyc differ diff --git a/orientation/__pycache__/visuals.cpython-37.pyc b/orientation/__pycache__/visuals.cpython-37.pyc index e78a5dd..f6343bb 100644 Binary files a/orientation/__pycache__/visuals.cpython-37.pyc and b/orientation/__pycache__/visuals.cpython-37.pyc differ diff --git a/orientation/calc_angles.py b/orientation/calc_angles.py index 4f54f67..557294b 100644 --- a/orientation/calc_angles.py +++ b/orientation/calc_angles.py @@ -1,5 +1,6 @@ import numpy as np + def get_com(universe, selection): ''' Return the centre of mass of a selection string ''' @@ -13,41 +14,45 @@ def get_principal_axes(universe, selection): # Methodology taken from Beckstein # https://stackoverflow.com/questions/49239475/ - # how-to-use-mdanalysis-to-principal-axes-and-moment-of-inertia-with-a-group-of-at/49268247#49268247 + # how-to-use-mdanalysis-to-principal-axes-and- + # moment-of-inertia-with-a-group-of-at/49268247#49268247 # select alpha carbons only CA = universe.select_atoms(selection) # calculate moment of inertia, and sort eigenvectors - I = CA.moment_of_inertia() + # PCB renamed I to Inertia (ambiguous variable name) + Inertia = CA.moment_of_inertia() # UT = CA.principal_axes() # U = UT.T - values, evecs = np.linalg.eigh(I) + values, evecs = np.linalg.eigh(Inertia) indices = np.argsort(values) U = evecs[:, indices] - ## Below is for testing ## - # Lambda = U.T.dot(I.dot(U)) + # Below is for testing ## + # Lambda = U.T.dot(Inertia.dot(U)) # # print(Lambda) # print(np.allclose(Lambda - np.diag(np.diagonal(Lambda)), 0)) - #print("") - #print(U) + # print("") + # print(U) return U def dir_cosine(v1, v2): - ''' + ''' Given two vectors (v1 and v2) work out the direction cosine between - them. For more information see: https://en.wikipedia.org/wiki/Direction_cosine) + them. For more information see: + https://en.wikipedia.org/wiki/Direction_cosine) ''' - direction_cosine = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)) + direction_cosine = np.dot(v1, v2) / (np.linalg.norm(v1) * + np.linalg.norm(v2)) return direction_cosine @@ -55,8 +60,8 @@ def dir_cosine(v1, v2): def make_direction_cosine_matrix(ref, axes_set): ''' - Given a set of two axes (ref_option and axes_set) work out the direction cosine matrix - between them. + Given a set of two axes (ref_option and axes_set) + work out the direction cosine matrix between them. ''' # Gather the individual vectors of our reference basis ex = ref[0, :] @@ -64,9 +69,9 @@ def make_direction_cosine_matrix(ref, axes_set): ez = ref[2, :] # principal axes calculated at this instance - u = axes_set[0, :] # 1st princpal axis (PA) - v = axes_set[1, :] # 2nd princpal axis (PA) - w = axes_set[2, :] # 3rd princpal axis (PA) + u = axes_set[0, :] # 1st princpal axis (PA) + v = axes_set[1, :] # 2nd princpal axis (PA) + w = axes_set[2, :] # 3rd princpal axis (PA) # Work out the dot prod b/w the 1st PA and the unit vectors u_alpha = dir_cosine(u, ex) @@ -83,9 +88,8 @@ def make_direction_cosine_matrix(ref, axes_set): w_beta = dir_cosine(w, ey) w_gamma = dir_cosine(w, ez) - # make all of the above into a 3 X 3 matrix and return it + # make all of the above into a 3 X 3 matrix and return it c_matrix = np.asarray([[u_alpha, u_beta, u_gamma], [v_alpha, v_beta, v_gamma], - [w_alpha, w_beta, w_gamma]]) - + [w_alpha, w_beta, w_gamma]]) return c_matrix diff --git a/orientation/protein_orientation.py b/orientation/protein_orientation.py index 56a752d..0cdaffd 100644 --- a/orientation/protein_orientation.py +++ b/orientation/protein_orientation.py @@ -1,20 +1,17 @@ -# import modules +# import modules import MDAnalysis as mda import numpy as np import scipy from scipy.spatial.transform import Rotation as R -import sys -import os import argparse # Lets run in parallel from joblib import Parallel, delayed import time -import sys # import functions from own modules from setup_system import get_universe, read_stride -from calc_angles import get_com, make_direction_cosine_matrix, get_principal_axes +from calc_angles import make_direction_cosine_matrix, get_principal_axes from visuals import vis_axes # Print Versions @@ -22,10 +19,11 @@ print("NumPy version: ", np.__version__) print("SciPy version: ", scipy.__version__) + def run_single(universe_files, protein_info, calc_method, vector_sel_resids): - # make visuals of the vectors true, since this is what this function does / is intended for. - options.vector_traj == True + # make visuals of the vectors true, since this is what this function does + options.vector_traj = True # sort out names for files path_name = str(gro_files[0]).split()[-1].split('.')[-2] @@ -36,25 +34,27 @@ def run_single(universe_files, protein_info, calc_method, vector_sel_resids): u = mda.Universe(universe_files[0]) # set the resids gathered from the stride file - # will be used to select the centre of mass, i.e this is the secondary structure of the protein + # will be used to select the centre of mass, i.e this is + # the secondary structure of the protein sel_resids = ' or resid '.join(map(str, protein_info['resids'])) sel = "name CA and (resid " + str(sel_resids) + ")" - # Calculate the raw principal axes - these will be used as a reference for the user - # as such, we will not need to calculate any angles and just write the vectors out. + # Calculate the raw principal axes - these will be used + # as a reference for the user as such, we will not need + # to calculate any angles and just write the vectors out. pa_array = get_principal_axes(u, sel) - # Make a row wise version for the direction cosine matrix calculation (see the func for - # why this is needed) + # Make a row wise version for the direction cosine + # matrix calculation (see the func for why this is needed) pa_array_row_wise = pa_array.T - ax1 = pa_array_row_wise[0] # i.e. the roll axis - ax2 = pa_array_row_wise[1] # i.e. the pitch axis - ax3 = pa_array_row_wise[2] # i.e. the yaw axis + ax1 = pa_array_row_wise[0] # i.e. the roll axis + ax2 = pa_array_row_wise[1] # i.e. the pitch axis + ax3 = pa_array_row_wise[2] # i.e. the yaw axis - ### Write out the PAs ### + # Write out the PAs # create coordinates array coord = np.array(u.select_atoms(sel).atoms.positions, float) @@ -62,17 +62,29 @@ def run_single(universe_files, protein_info, calc_method, vector_sel_resids): # compute geometric center center = np.mean(coord, 0) - vis_axes(vis='vmd', axes_data=[ax1, ax2, ax3], center=center, name=output_file_name) + vis_axes(vis='vmd', + axes_data=[ax1, ax2, ax3], + center=center, + name=output_file_name) -def run_multiple(universe_files, protein_info, skip_val, calc_method, vector_sel_resids, states, run, ref_option, ref_basis): +def run_multiple( + universe_files, + protein_info, + skip_val, + calc_method, + vector_sel_resids, + states, + run, + ref_option, + ref_basis): euler_angle_store = [] print("Here we go!") # find the residues the user specified for the pitch, roll, and yaw - # these are used to make sure the principal axes vectors calculated + # these are used to make sure the principal axes vectors calculated # are always pointing in the right direction. user_resid_pitch_sel = vector_sel_resids.split(',')[0] user_resid_roll_sel = vector_sel_resids.split(',')[1] @@ -83,11 +95,13 @@ def run_multiple(universe_files, protein_info, skip_val, calc_method, vector_sel output_file_name = path_name.split()[-1].split('/')[-1] - # define the current universe, this is accessed in a for loop. + # define the current universe, this is accessed in a for loop. u = mda.Universe(universe_files[run][0], universe_files[run][1]) - # initialise states dictionary - maybe run with if statement? - is this needed anymore? - states_dict = {'system ' + str(run): {'frame': [], 'pitch': [], 'contact': []}} + # initialise states dictionary - maybe run with if statement? + # - is this needed anymore? Commented out by PCB + # states_dict = {'system ' + str(run): {'frame': [], + # 'pitch': [], 'contact': []}} # Go through the current run for i, ts in enumerate(u.trajectory[::skip_val]): @@ -96,27 +110,31 @@ def run_multiple(universe_files, protein_info, skip_val, calc_method, vector_sel print("Frame = ", ts.frame, ", Time = ", ts.time / 1000, "ns") # set the resids gathered from the stride file - # will be used to select the centre of mass, i.e this is the secondary structure of the protein + # will be used to select the centre of mass, i.e this + # is the secondary structure of the protein sel_resids = ' or resid '.join(map(str, protein_info['resids'])) sel = "name CA and (resid " + str(sel_resids) + ")" - # Define vectors - drawn from centre of mass of body to the resid chosen - + # Define vectors - drawn from centre of mass of body + # to the resid chosen # Define the initial pitch vector based on the users choice user_ax1 = list( - u.select_atoms("resid " + str(user_resid_pitch_sel) + " and name CA").atoms.positions[ - 0] - u.select_atoms(sel).center_of_mass()) + u.select_atoms("resid " + str(user_resid_pitch_sel) + + " and name CA").atoms.positions[ + 0] - u.select_atoms(sel).center_of_mass()) # Define the initial roll vector based on the users choice user_ax2 = list( - u.select_atoms("resid " + str(user_resid_roll_sel) + " and name CA").atoms.positions[ - 0] - u.select_atoms(sel).center_of_mass()) + u.select_atoms("resid " + str(user_resid_roll_sel) + + " and name CA").atoms.positions[ + 0] - u.select_atoms(sel).center_of_mass()) # Define the initial yaw vector based on the users choice user_ax3 = list( - u.select_atoms("resid " + str(user_resid_yaw_sel) + " and name CA").atoms.positions[ - 0] - u.select_atoms(sel).center_of_mass()) + u.select_atoms("resid " + str(user_resid_yaw_sel) + + " and name CA").atoms.positions[ + 0] - u.select_atoms(sel).center_of_mass()) # Normalise the user vectors as they will not be between 0 & 1 user_ax1 = user_ax1 / np.linalg.norm(user_ax1) @@ -124,23 +142,25 @@ def run_multiple(universe_files, protein_info, skip_val, calc_method, vector_sel user_ax3 = user_ax3 / np.linalg.norm(user_ax3) ############################# - # # - # Calculate Euler Angles # + # # + # Calculate Euler Angles # # # ############################# - # if the user has selected a starting set of principal axes to ensure the PAs always point + # if the user has selected a starting set of principal + # axes to ensure the PAs always point # towards them, use these if calc_method == "user_pa": pa_array = get_principal_axes(u, sel) - # Make a row wise version for the direction cosine matrix calculation (see the func for - # why this is needed) + # Make a row wise version for the direction cosine + # matrix calculation (see the func for why this is needed) pa_array_row_wise = pa_array.T - # Since the way in which the principal axes are calculated in MDAnalysis we need - # to check that the principal axes at each frame are pointing in the same direction + # Since the way in which the principal axes are calculated + # in MDAnalysis we need to check that the principal axes + # at each frame are pointing in the same direction # as the one specified by the user, if it is then flip it: if np.dot(user_ax1, pa_array_row_wise[0, :]) < 0: @@ -156,23 +176,25 @@ def run_multiple(universe_files, protein_info, skip_val, calc_method, vector_sel pa_array_row_wise[2, :] = pa_array_row_wise[2, :] * -1 ############################## - ### Get reference basis ### + # Get reference basis # ############################## if ref_option == 'first_frame': - # In the first frame we use this basis as the reference for all others - if i == 0: + # In the first frame we use this basis as the reference + # for all others + if i == 0: ref = pa_array_row_wise elif ref_option == 'user': - # read each line of the supplied file, corresponds to each basis vector + # read each line of the supplied file, corresponds + # to each basis vector - with open(ref_basis, 'r') as file: - a = [int(number) for number in file.readline().split(',')] - b = [int(number) for number in file.readline().split(',')] - c = [int(number) for number in file.readline().split(',')] + with open(ref_basis, 'r') as file: + a = [int(number) for number in file.readline().split(',')] + b = [int(number) for number in file.readline().split(',')] + c = [int(number) for number in file.readline().split(',')] ref = np.array([a, b, c]) @@ -182,57 +204,69 @@ def run_multiple(universe_files, protein_info, skip_val, calc_method, vector_sel ref = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) ################################ - ### Calc DCM and Rot matrix ### + # Calc DCM and Rot matrix # ################################ - # Calculate the direction cosine matrix between the current orthonormal basis and the reference - dir_cosine_matrix = make_direction_cosine_matrix(ref=ref, axes_set=pa_array_row_wise) + # Calculate the direction cosine matrix between the current + # orthonormal basis and the reference + dir_cosine_matrix = make_direction_cosine_matrix( + ref=ref, + axes_set=pa_array_row_wise) # Create a rotation object from the direction cosine matrix rotation_obj = R.from_dcm(dir_cosine_matrix.T) ############################## - ### Calculate Euler Angles ### + # Calculate Euler Angles # ############################## - # Intrinisic rotation formalism used: Rotate about Z first, then Y', then X''. These correspond to Yaw, Pitch, and Roll (https://en.wikipedia.org/wiki/Euler_angles#Alternative_names) + # Intrinisic rotation formalism used: Rotate about Z first, + # then Y', then X''. These correspond to Yaw, Pitch, and Roll + # (https://en.wikipedia.org/wiki/Euler_angles#Alternative_names) euler_angles_array = rotation_obj.as_euler('ZYX', degrees=True) # Store the euler_angles_array euler_angle_store.append(euler_angles_array) - #testing + # testing # print("apply rotation to ref: ", rotation_obj.apply(ref)) # print(" ") # print("ref basis ", ref) # print(" ") - # print("rotated ref ", dir_cosine_matrix @ ref) # '@' used for matrix multiplication + # print("rotated ref ", dir_cosine_matrix @ ref) # '@' used + # for matrix multiplication # print(" ") # print(" basis at frame: ", pa_array_row_wise) # print(" ") - # Display calculated angles - yaw, pitch, roll = np.round(euler_angles_array[0], 2), np.round(euler_angles_array[1], 2), np.around(euler_angles_array[2], 2) + # Display calculated angles - PCB sllit this line for readability + yaw = np.round(euler_angles_array[0], 2) + pitch = np.round(euler_angles_array[1], 2) + roll = np.around(euler_angles_array[2], 2) - print('EULER ANGLES: Yaw = ' + str(yaw) + ', Pitch = ' + str(pitch) + ', Roll = ' + str(roll) + '\n') + print('EULER ANGLES: Yaw = ' + str(yaw) + ', Pitch = ' + + str(pitch) + ', Roll = ' + str(roll) + '\n') - ax1 = pa_array_row_wise[0] # i.e. the roll axis - ax2 = pa_array_row_wise[1] # i.e. the pitch axis - ax3 = pa_array_row_wise[2] # i.e. the yaw axis + ax1 = pa_array_row_wise[0] # i.e. the roll axis + ax2 = pa_array_row_wise[1] # i.e. the pitch axis + ax3 = pa_array_row_wise[2] # i.e. the yaw axis - # If the user has defined their own axes there is no need to flip them - # i.e. at every frame three vectors are drawn from the CoM to the three user defined residues (these may not be / probably won't be orthogonal) + # If the user has defined their own axes there is no need + # to flip them i.e. at every frame three vectors are drawn + # from the CoM to the three user defined residues (these may + # not be / probably won't be orthogonal) elif calc_method == 'user': ax1 = user_ax1 ax2 = user_ax2 ax3 = user_ax3 - # TODO - # calculate angles with Euler again? basis may not be orthogonal... + # TODO + # calculate angles with Euler again? basis may not + # be orthogonal... # Check if we want to write out the vectors to a pdb file - if options.vector_traj == True: + if options.vector_traj: # create coordinates array coord = np.array(u.select_atoms(sel).atoms.positions, float) @@ -240,9 +274,15 @@ def run_multiple(universe_files, protein_info, skip_val, calc_method, vector_sel # compute geometric center center = np.mean(coord, 0) - vis_axes(vis='vmd', axes_data=[ax1, ax2, ax3], center=center, name=output_file_name) + vis_axes(vis='vmd', + axes_data=[ax1, ax2, ax3], + center=center, + name=output_file_name) - np.save(output_file_name +'_euler_angles.npy', np.array(euler_angle_store)) + np.save( + output_file_name + '_euler_angles.npy', + np.array(euler_angle_store) + ) def init_parser(): @@ -250,49 +290,117 @@ def init_parser(): ''' Gather all of the relevant user information ''' parser = argparse.ArgumentParser( - description="Calculates the orientation of a user defined region of a protein") + description="""Calculates the orientation of a user + defined region of a protein""") parser.add_argument("-c", dest="gro_file_list", required=True, - help="The list of coordinate files [.gro], this takes the form of a text file with each file location starting on a new line.") + help="""The list of coordinate files [.gro], this + takes the form of a text file with each file location + starting on a new line.""") parser.add_argument("-f", dest="xtc_file_list", required=True, - help='The list of corrected trajectory files: pbc artifacts removed, no jumping across PBC. This takes the form of a text file with each file location starting on a new line.') - - parser.add_argument("-com_sel", dest="com_selection", type=str, required=True, - help='The range of resids to use for centre of mass calculation, in the form of A:B, where A and B are integers.') - - parser.add_argument("-method", dest="method", type=str, required=True, default="user_pa", - help="The vectors can be calculated by 1) a set of user defined vectors based on the centre of mass of the main selection and the alpha carbon (CA) of a specified residue OR 2) the method can be used in combination with (1) and use the principal axes of inertia. In either (1) or (2) the user must define a set of vectors that roughly correspond to the principal axes - this ensures that when calculated they always point in the direction specified by the users vectors. Options: user or user_pa. Default = user_pa") + help="""The list of corrected trajectory files: + pbc artifacts removed, no jumping across PBC. + This takes the form of a text file with each + file location starting on a new line.""") + + parser.add_argument("-com_sel", + dest="com_selection", + type=str, + required=True, + help="""The range of resids to use for centre of + mass calculation, in the form of A:B, where A + and B are integers.""") + + parser.add_argument("-method", + dest="method", + type=str, + required=True, + default="user_pa", + help="""The vectors can be calculated by + 1) a set of user defined vectors based on the + centre of mass of the main selection and the + alpha carbon (CA) of a specified residue OR 2) the + method can be used in combination with (1) and use + the principal axes of inertia. In either (1) or + (2) the user must define a set of vectors that + roughly correspond to the principal axes - this + ensures that when calculated they always point + in the direction specified by the users vectors. + Options: + user or user_pa. Default = user_pa""") parser.add_argument("-n", dest="num_of_proteins", type=int, required=True, - help='Number of protein copies in the system, default 1.') + help="""Number of protein copies in the system, + default 1.""") parser.add_argument("-skip", dest="skip", type=int, default=1, help="The number of frames to skip, default 1.") parser.add_argument("-vtraj", dest="vector_traj", type=bool, default=False, - help="Set to True if you want a trajectory of the vectors, default False.") + help="""Set to True if you want a trajectory + of the vectors, default False.""") - parser.add_argument("-res_vector_sel", dest="res_vector_sel", type=str, default=None, - help="The resids of the residues to use for the roll, pitch, and yaw calculation respectively: in the form A, B, C.") + parser.add_argument("-res_vector_sel", + dest="res_vector_sel", + type=str, + default=None, + help="""The resids of the residues to use for + the roll, pitch, and yaw calculation + respectively: in the form A, B, C.""") parser.add_argument("-stride", dest="stride_file", type=str, - help="The name of the stride file to read, a .txt file. This will be used in combination with the -com_sel selection to only choose those residues involved in secondary structure. If using the 'user_pa' method (see -method) this option must be supplied.") + help="""The name of the stride file to + read, a .txt file.This will be used in combination + with the -com_sel selection to only choose those + residues involved in secondary structure. + If using the 'user_pa' method (see -method) this + option must be supplied.""") parser.add_argument("-pa_only", dest="pa_single", type=bool, - help="If set to True a principal component calculation will be carried out and written to a .pdb file, this is to help in selecting the appropriate residues for a run. Default False") + help="""If set to True a principal component + calculation will be carried out and written to + a .pdb file, this is to help in selecting the + appropriate residues for a run. Default False""") parser.add_argument("-nprocs", dest="nprocs", type=int, default=1, - help="Number of processes to use, default=1.") - - parser.add_argument("-ref_option", dest="ref_option", type=str, default="standard", - help="Choice of what basis of vectors to use as a reference, from which the Euler angles will be calcualted. Permitted chocies are: 'first_frame', angles will be calculated in reference to the PAs calculated in the first frame. 'user', angles will be calculated in reference to a user defined set of vectors. 'standard' (recommended) where the standard is x, y, z = [1,0,0], [0,1,0], [0,0,1]. default = 'standard'.") + help="""Number of processes to use, default=1.""") + + parser.add_argument("-ref_option", + dest="ref_option", + type=str, + default="standard", + help="""Choice of what basis of vectors to use + as a referencefrom which the Euler angles will + be calcualted. Permitted chocies are: 'first_frame', + angles will be calculated in reference to the + PAs calculated in the first frame. 'user', angles + will be calculated in reference to a user defined + set of vectors. 'standard' (recommended) + where the standard is x, y, z = + [1,0,0], [0,1,0], [0,0,1]. default = 'standard'.""") parser.add_argument("-ref_basis", dest="ref_basis", type=str, - help="To be used in combination with 'user' if used (see -ref_option). The basis vectors to be used as a reference, if not passed the default will be used (see -ref_option). This should be a .txt file with the x, y, z coordinates on each line.") - - parser.add_argument("-sec_struc_choice", dest="sec_struc_choice", default=['strand', '310helix', 'alphahelix'], - help="A file containing the choice of secondary structure to use in the calculation of the centre of mass. If using the 'user_pa' method (see -method) this option must be supplied. Valid choices include: 'strand', '310helix', or 'alphahelix'. In the file these must be comma separated and have no whitespace between them. e.g. strand,310helix") + help="""To be used in combination with 'user' + if used (see -ref_option). The basis vectors to be + used as a reference, if not passed the default + will be used (see -ref_option). This should + be a .txt file with the + x, y, z coordinates on each line.""") + + parser.add_argument("-sec_struc_choice", + dest="sec_struc_choice", + default=['strand', + '310helix', + 'alphahelix'], + help="""A file containing the choice of secondary + structure to use in the calculation of the centre + of mass. If using the 'user_pa' method (see -method) + this option must be supplied. Valid choices include: + 'strand', '310helix', or 'alphahelix'. In the file + these must be comma separated and have no whitespace + between them. + e.g. strand, 310helix""") return parser.parse_args() @@ -302,7 +410,7 @@ def init_parser(): # Get the users options options = init_parser() - # Initialise file lists + # Initialise file lists gro_files = [] xtc_files = [] systems = [] @@ -316,68 +424,80 @@ def init_parser(): xtc_files = f.read().splitlines() # populate the systems list with each element a .gro and .xtc file - # this assumes the list supplied is ordered as sequential repeats. + # this assumes the list supplied is ordered as sequential repeats. for i in range(len(gro_files)): systems.append([gro_files[i], xtc_files[i]]) ###################################################### - ### Get information about protein(s) in the system ### + # Get information about protein(s) in the system # ###################################################### - # Get the start and end of the protein based on the users selection for the centre of mass + # Get the start and end of the protein based + # on the users selection for the centre of mass start_of_protein = int(options.com_selection.split()[-1].split(':')[0]) end_of_protein = int(options.com_selection.split()[-1].split(':')[1]) - # Load a temp universe, this is to get resids and protein lengths etc to be used in the main loop - # We assume that the user is analysing multiple repeats of the SAME system i.e. multiple MD runs - temp_universe = get_universe(systems[0][0]) # just the coordinate file - + # Load a temp universe, this is to get resids and protein + # lengths etc to be used in the main loop + # We assume that the user is analysing multiple repeats + # of the SAME system i.e. multiple MD runs + temp_universe = get_universe(systems[0][0]) # just the coordinate file + # get resids and length of protein resid_list = temp_universe.select_atoms("protein").residues.resids prot_sel_length = len( - temp_universe.select_atoms("resid " + str(start_of_protein) + ":" + str(end_of_protein) + " and name CA")) + temp_universe.select_atoms("resid " + str(start_of_protein) + + ":" + str(end_of_protein) + + " and name CA")) - # Initialise dictionary that holds information for protein (resids and angles) - protein_info = read_stride(stride_file=options.stride_file, protein_sel_length=int(prot_sel_length), sec_struc_choice=options.sec_struc_choice) + # Initialise dictionary that holds information for + # protein (resids and angles) + protein_info = read_stride(stride_file=options.stride_file, + protein_sel_length=int(prot_sel_length), + sec_struc_choice=options.sec_struc_choice) ##################### - ## ## - ## RUN ## - ## ## + # # + # RUN # + # # ##################### - if options.pa_single: # If the user just wants to find the principal axes + if options.pa_single: # If the user just wants to find the principal axes single_system = systems[0] print("here") print(systems[0]) - + run_single(universe_files=single_system, - protein_info=protein_info, - calc_method=options.method, - vector_sel_resids=options.res_vector_sel) + protein_info=protein_info, + calc_method=options.method, + vector_sel_resids=options.res_vector_sel) else: - # Run analysis - use multiple processes to run each system at the same time + # Run analysis - use multiple processes to + # run each system at the same time print("Pool Execution") start = time.time() - results = Parallel(n_jobs=options.nprocs, - verbose=3, - backend="multiprocessing")( - delayed(run_multiple)(universe_files=systems, - protein_info=protein_info, - skip_val=options.skip, - calc_method=options.method, - vector_sel_resids=options.res_vector_sel, - states=None, - run=i, - ref_option=options.ref_option, - ref_basis=options.ref_basis) for i, system in enumerate(systems)) + results = Parallel( + n_jobs=options.nprocs, + verbose=3, + backend="multiprocessing")( + delayed(run_multiple)( + universe_files=systems, + protein_info=protein_info, + skip_val=options.skip, + calc_method=options.method, + vector_sel_resids=options.res_vector_sel, + states=None, + run=i, + ref_option=options.ref_option, + ref_basis=options.ref_basis) for i, system in enumerate(systems) + ) delta = time.time() - start delta = delta / 60.0 diff --git a/orientation/setup_system.py b/orientation/setup_system.py index 99a7e86..86d4913 100644 --- a/orientation/setup_system.py +++ b/orientation/setup_system.py @@ -1,5 +1,5 @@ import MDAnalysis as mda -import numpy as np + def get_universe(gro_file, traj_file=None): @@ -7,7 +7,7 @@ def get_universe(gro_file, traj_file=None): # print(gro_file, traj_file) - if traj_file != None: + if traj_file is not None: u = mda.Universe(gro_file, traj_file) @@ -20,47 +20,68 @@ def get_universe(gro_file, traj_file=None): def read_stride(stride_file, protein_sel_length, sec_struc_choice): ''' - This reads the output from running stride structure.pdb, this is used to identify beta sheets and alpha - helices. Due to flexible loops the calculated principal axes can differ so using more stable regions can give - less noisy data. - - Prior to this function the user must run "stride file.pdb > stride_file.txt" and then use this file for the -stride option. + This reads the output from running stride structure.pdb, + this is used to identify beta sheets and alpha + helices. Due to flexible loops the calculated principal + axes can differ so using more stable regions can give + less noisy data. + + Prior to this function the user must run "stride file.pdb + > stride_file.txt" and then use this file for the -stride option. ''' # TODO: work on a specific region of the protein as an option - sec_struc_dict = {'strand': 'E', '310helix' : 'G', 'alphahelix': 'H'} + sec_struc_dict = {'strand': 'E', '310helix': 'G', 'alphahelix': 'H'} with open(sec_struc_choice, 'r') as ss_file: - - choices = [str(choice).rstrip() for choice in ss_file.readline().split(',')] # rstrip() to remove trailing characters + + choices = [str(choice).rstrip() + for choice in ss_file.readline().split(',')] + # rstrip() to remove trailing characters resid_list = [] - # make a list of the types of secondary structure to include, this is done by referencing sec_struc_dict to get the one letter code as defined by / in the supplied stride file + # make a list of the types of secondary structure to include, this + # is done by referencing sec_struc_dict to get the one letter code + # as defined by / in the supplied stride file sec_struc_list = [sec_struc_dict[sec_feature] for sec_feature in choices] # print(sec_struc_list) - # Populate resid_list with the list of residues to use for the main calculation - done with reference to the secondary structure (i.e. ignore flexible loop regions) + # Populate resid_list with the list of residues to use for the main + # calculation - done with reference to the secondary structure + # (i.e. ignore flexible loop regions) with open(stride_file) as f: for line in f.readlines(): - if line.splitlines()[0][0] == 'A': # if at the asignment (ASG) part of the stride file + # if at the asignment (ASG) part of the stride file + if line.splitlines()[0][0] == 'A': + + if line.splitlines()[0][24] in sec_struc_list: + # Read the one letter code column of the stride file + res = (line.splitlines()[0][12], + line.splitlines()[0][13], + line.splitlines()[0][14]) + # get the resid (only works for up to 999 currently) + resid_list.append(int(''.join(res))) + # join them up to make the correct number and append - if line.splitlines()[0][24] in sec_struc_list: # Read the one letter code column of the stride file - res = (line.splitlines()[0][12], line.splitlines()[0][13], line.splitlines()[0][14]) # get the resid (only works for up to 999 currently) - resid_list.append(int(''.join(res))) # join them up to make the correct number and append + # Make the dictionary with the relevant resids and empty + # lists to store the Euler angles later for each protein in the system + protein_dict = {'resids': [], + 'angle_pa1': [], + 'angle_pa2': [], + 'angle_pa3': []} - # Make the dictionary with the relevant resids and empty lists to store the Euler angles later for each protein in the system - protein_dict = {'resids': [],'angle_pa1': [],'angle_pa2': [],'angle_pa3': []} - - protein_dict['resids'] = [t for t in resid_list if 1 <= t <= protein_sel_length] + protein_dict['resids'] = [ + t for t in resid_list if 1 <= t <= protein_sel_length] # # Need to test below works on a multi chain system # else: - # chain_dict['chain ' + str(i)]['resids'] = [t for t in resid_list if - # (i * chain_length) + 1 <= t <= ((i+1) * chain_length)] + # chain_dict['chain ' + str(i)]['resids'] = + # [t for t in resid_list if (i * chain_length) + # + 1 <= t <= ((i+1) * chain_length)] return protein_dict diff --git a/orientation/visuals.py b/orientation/visuals.py index 59f5125..a5504dd 100644 --- a/orientation/visuals.py +++ b/orientation/visuals.py @@ -1,8 +1,9 @@ scale_factor = 20 + def vis_axes(vis, axes_data, center, name): - ''' + ''' Visualise the principal axes being used for the calculation. vis : the format to write the axes out as (either 'vmd' or 'pymol') @@ -20,26 +21,28 @@ def vis_axes(vis, axes_data, center, name): output = open(str(name) + '_pa_vectors.pdb', 'a') for i in range(0, (3 * scale_factor)): - tmp = "ATOM {0:3d} CA ALA A {1:3d} {2:8.3f}{3:8.3f}{4:8.3f} 1.00 0.00\n".format(i, i, center[0] + (axis1[0] * i), - center[1] + (axis1[1] * i), - center[2] + (axis1[2] * i)) + tmp = "ATOM {0:3d} CA ALA A {1:3d} {2:8.3f}{3:8.3f}{4:8.3f}\ + 1.00 0.00\n".format(i, i, center[0] + (axis1[0] * i), + center[1] + (axis1[1] * i), + center[2] + (axis1[2] * i)) output.write(tmp) output.write("TER\n") for j in range(0, (2 * scale_factor)): - tmp2 = "ATOM {0:3d} CA ALA B {1:3d} {2:8.3f}{3:8.3f}{4:8.3f} 1.00 0.00\n".format(j, j, center[0] + (axis2[0] * j), - center[1] + (axis2[1] * j), - center[2] + (axis2[2] * j)) + tmp2 = "ATOM {0:3d} CA ALA B {1:3d} {2:8.3f}{3:8.3f}{4:8.3f}\ + 1.00 0.00\n".format(j, j, center[0] + (axis2[0] * j), + center[1] + (axis2[1] * j), + center[2] + (axis2[2] * j)) output.write(tmp2) output.write("TER\n") - for k in range(0, (1 * scale_factor)): - tmp3 = "ATOM {0:3d} CA ALA C {1:3d} {2:8.3f}{3:8.3f}{4:8.3f} 1.00 0.00\n".format(k, k, center[0] + (axis3[0] * k), - center[1] + (axis3[1] * k), - center[2] + (axis3[2] * k)) + tmp3 = "ATOM {0:3d} CA ALA C {1:3d} {2:8.3f}{3:8.3f}{4:8.3f}\ + 1.00 0.00\n".format(k, k, center[0] + (axis3[0] * k), + center[1] + (axis3[1] * k), + center[2] + (axis3[2] * k)) output.write(tmp3) output.write("TER\nENDMDL\n") @@ -60,7 +63,7 @@ def vis_axes(vis, axes_data, center, name): # the small vector is the third principal axis point3 = 1 * scale_factor * axis3 + center - #pymol_name = pdb_name.replace(".pdb", "_axes.pml") + # pymol_name = pdb_name.replace(".pdb", "_axes.pml") pymol_name = (name + "_pa_vectors.pml") with open(pymol_name, "w") as pymol_file: @@ -77,10 +80,13 @@ def vis_axes(vis, axes_data, center, name): cmd.load_cgo(axis2, 'axis2') cmd.load_cgo(axis3, 'axis3') cmd.set('cgo_line_width', 4) - """ % ( \ - center[0], center[1], center[2], point1[0], point1[1], point1[2], \ - center[0], center[1], center[2], point2[0], point2[1], point2[2], \ - center[0], center[1], center[2], point3[0], point3[1], point3[2])) + """ % ( + center[0], center[1], center[2], point1[0], + point1[1], point1[2], + center[0], center[1], center[2], point2[0], + point2[1], point2[2], + center[0], center[1], center[2], point3[0], + point3[1], point3[2])) # -------------------------------------------------------------------------- # create .pml script for nice rendering in Pymol diff --git a/requirements.txt b/requirements.txt index 8466747..ed35c41 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ MDAnalysis numpy scipy +flake8 pytest pytest-cov codecov