diff --git a/.github/workflows/pip_installation.yml b/.github/workflows/pip_installation.yml index 1d7a0cc..f688aa8 100644 --- a/.github/workflows/pip_installation.yml +++ b/.github/workflows/pip_installation.yml @@ -13,7 +13,8 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macOS-latest, windows-latest] + # os: [ubuntu-latest, macOS-latest, windows-latest] + os: [ubuntu-latest, windows-latest] steps: - uses: actions/checkout@v2 - uses: conda-incubator/setup-miniconda@v2 @@ -38,7 +39,8 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macOS-latest, windows-latest] + # os: [ubuntu-latest, macOS-latest, windows-latest] + os: [ubuntu-latest, windows-latest] steps: - uses: actions/checkout@v2 - uses: conda-incubator/setup-miniconda@v2 diff --git a/.github/workflows/publish_and_release.yml b/.github/workflows/publish_and_release.yml index dc7fea7..4c8b830 100644 --- a/.github/workflows/publish_and_release.yml +++ b/.github/workflows/publish_and_release.yml @@ -73,38 +73,38 @@ jobs: asset_path: release/one_click_linux_gui/dist/pydiaid_gui_installer_linux.deb asset_name: pydiaid_gui_installer_linux.deb asset_content_type: application/octet-stream - Create_MacOS_Release: - runs-on: macos-latest - needs: Create_Draft_On_GitHub - steps: - - name: Checkout code - uses: actions/checkout@v2 - - uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - python-version: ${{ matrix.python-version }} - - name: Conda info - shell: bash -l {0} - run: conda info - - name: Creating installer for MacOS - shell: bash -l {0} - run: | - cd release/one_click_macos_gui - . ./create_installer_macos.sh - - name: Test installer for MacOS - shell: bash -l {0} - run: | - sudo installer -pkg release/one_click_macos_gui/dist/pydiaid_gui_installer_macos.pkg -target / - - name: Upload MacOS Installer - id: upload-release-asset - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ needs.Create_Draft_On_GitHub.outputs.upload_url }} - asset_path: release/one_click_macos_gui/dist/pydiaid_gui_installer_macos.pkg - asset_name: pydiaid_gui_installer_macos.pkg - asset_content_type: application/octet-stream + # Create_MacOS_Release: + # runs-on: macos-latest + # needs: Create_Draft_On_GitHub + # steps: + # - name: Checkout code + # uses: actions/checkout@v2 + # - uses: conda-incubator/setup-miniconda@v2 + # with: + # auto-update-conda: true + # python-version: ${{ matrix.python-version }} + # - name: Conda info + # shell: bash -l {0} + # run: conda info + # - name: Creating installer for MacOS + # shell: bash -l {0} + # run: | + # cd release/one_click_macos_gui + # . ./create_installer_macos.sh + # - name: Test installer for MacOS + # shell: bash -l {0} + # run: | + # sudo installer -pkg release/one_click_macos_gui/dist/pydiaid_gui_installer_macos.pkg -target / + # - name: Upload MacOS Installer + # id: upload-release-asset + # uses: actions/upload-release-asset@v1 + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # with: + # upload_url: ${{ needs.Create_Draft_On_GitHub.outputs.upload_url }} + # asset_path: release/one_click_macos_gui/dist/pydiaid_gui_installer_macos.pkg + # asset_name: pydiaid_gui_installer_macos.pkg + # asset_content_type: application/octet-stream Create_Windows_Release: runs-on: windows-latest needs: Create_Draft_On_GitHub diff --git a/README.md b/README.md index 96050af..a924f9a 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,7 @@ In case of issues, check out the following links: --- ## Citations -Check out the [dia-PASEF publication](https://doi.org/10.1016/j.mcpro.2022.100279) and [synchro-PASEF publication](https://doi.org/10.1016/j.mcpro.2022.100489). +Check out the [optimal dia-PASEF](https://doi.org/10.1016/j.mcpro.2022.100279), [synchro-PASEF](https://doi.org/10.1016/j.mcpro.2022.100489) and [PASEF workflows and py_diAID](https://doi.org/10.1038/s41596-024-01104-w) publications. --- ## How to contribute diff --git a/misc/bumpversion.cfg b/misc/bumpversion.cfg index e158241..335e27b 100644 --- a/misc/bumpversion.cfg +++ b/misc/bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.0.30 +current_version = 0.0.40 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? diff --git a/pydiaid/__init__.py b/pydiaid/__init__.py index 756beda..87d73bc 100644 --- a/pydiaid/__init__.py +++ b/pydiaid/__init__.py @@ -2,7 +2,7 @@ __project__ = "pydiaid" -__version__ = "0.0.30" +__version__ = "0.0.40" __license__ = "Apache" __description__ = "An open-source Python package of the AlphaPept ecosystem" __author__ = "Mann Labs" diff --git a/pydiaid/gui.py b/pydiaid/gui.py index 55245d0..9c1c116 100644 --- a/pydiaid/gui.py +++ b/pydiaid/gui.py @@ -481,7 +481,7 @@ def __init__(self, start_server=False): name="py_diAID", github_url='https://github.com/MannLabs/pydiaid', ) - self.project_description = """#### py_diAID is a Python tool that automatically and optimally places DIA (Data-Independent Acquisition) window schemes for efficient precursor coverage. Using pre-acquired precursor information, it generates dia-PASEF, synchro-PASEF, and Orbitrap Astral DIA methods. The name diAID stands for Automated Isolation Design for DIA.\n Please cite: Skowronek, … , Mann, MCP, 2022 for dia-PASEF and \n Skowronek, … , Willems, Raether, Mann, MCP, 2023 for synchro-PASEF.""" + self.project_description = """#### py_diAID is a Python tool that automatically and optimally places DIA (Data-Independent Acquisition) window schemes for efficient precursor coverage. Using pre-acquired precursor information, it generates dia-PASEF, synchro-PASEF, and Orbitrap Astral DIA methods. The name diAID stands for Automated Isolation Design for DIA.\n Please cite: Skowronek et al., Mann, MCP, 2022 for optimal dia-PASEF methods, \n Skowronek et al., Willems, Raether, Mann, MCP, 2023 for synchro-PASEF, \n Skowronek et al., Mann, Nat Protoc, 2025 for PASEF workflows and py_diAID.""" self.manual_path = os.path.join( DOCS_PATH, diff --git a/pydiaid/loader.py b/pydiaid/loader.py index 8a14775..fd22966 100644 --- a/pydiaid/loader.py +++ b/pydiaid/loader.py @@ -31,7 +31,11 @@ def load_library( try: # Special case for DIANN single-run which needs the file path directly if analysis_software == 'DIANN single-run': - return __parse_diann_single_run(library_name, ptm_list, require_im) + if library_name.split(".")[-1] == "parquet": + analysis_software = 'DIANN library' + else: + return __parse_diann_single_run(library_name, ptm_list, require_im) + # For all other software, load the dataframe first dataframe = __load_dataframe_from_file(library_name) @@ -73,9 +77,67 @@ def __load_dataframe_from_file( if library_name.split(".")[-1] == "csv": return pd.read_csv(library_name, sep=',') + if library_name.split(".")[-1] == "parquet": + return pd.read_parquet(library_name, engine='fastparquet') else: return pd.read_csv(library_name, sep='\t') # .xls, .tsv, .txt +class ColumnMapper: + """Handles column name mapping across different software versions.""" + + # Define all possible column variants as class attributes + COLUMN_VARIANTS = { + 'decoy': ['decoy', 'Decoy', 'is_decoy'], + 'qvalue': ['QValue', 'Q.Value', 'q_value', 'Q_Value'], + 'mobility': [ + 'PrecursorIonMobility', + 'IonMobility', + 'Ion Mobility', + 'ion_mobility', + 'IM', + 'Mobility', + '1/K0' + ], + 'mz': ['PrecursorMz', 'Precursor.Mz', 'Mz', 'PrecursorMZ', 'Calibrated Observed M/Z'], + 'charge': ['PrecursorCharge', 'Precursor.Charge', 'Charge'], + 'protein': ['ProteinId', 'ProteinName', 'Protein.Names', 'Protein', 'Protein ID'], + 'modified_peptide': [ + 'ModifiedPeptideSequence', + 'ModifiedPeptide', + 'Modified.Sequence', + 'Modified Sequence', + 'Modified Peptide' + ], + 'peptide': ['Peptide', 'PeptideSequence', 'Sequence'] + } + + def __init__(self, dataframe: pd.DataFrame): + """Initialize with a dataframe and map its columns.""" + self.df = dataframe + self.column_map = self._create_column_map() + + def _create_column_map(self) -> dict: + """Create mapping of standard names to actual column names in dataframe.""" + column_map = {} + for standard_name, variants in self.COLUMN_VARIANTS.items(): + found_col = next((col for col in variants if col in self.df.columns), None) + column_map[standard_name] = found_col + return column_map + + def get_column(self, standard_name: str) -> str: + """Get the actual column name for a standard column identifier.""" + return self.column_map.get(standard_name) + + def validate_required_columns(self, required_columns: list) -> None: + """Validate that all required columns exist.""" + missing = [col for col in required_columns if self.get_column(col) is None] + if missing: + raise ValueError(f"Required columns missing: {', '.join(missing)}") + + def has_column(self, standard_name: str) -> bool: + """Check if a standard column exists in the dataframe.""" + return self.get_column(standard_name) is not None + def __parse_alpha_pept( dataframe: pd.DataFrame, @@ -175,8 +237,8 @@ def __parse_ms_fragger( columns. Parameters: - dataframe (pd.DataFrame): imported output file from the analysis software - "MSFragger". + dataframe (pd.DataFrame): imported library or psm file from the analysis software + "MSFragger". Required columns (supports multiple naming variants) File format: .tsv, required columns: 'PrecursorMz', 'PrecursorIonMobility', 'PrecursorCharge', 'ProteinId', 'ModifiedPeptideSequence'. ptm_list (list): a list with identifiers used for filtering a specific dataframe column. @@ -186,19 +248,28 @@ def __parse_ms_fragger( pd.DataFrame: returns a pre-filtered data frame with unified column names. """ - im_col = 'PrecursorIonMobility' if 'PrecursorIonMobility' in dataframe.columns else None + mapper = ColumnMapper(dataframe) - if require_im and im_col is None: - raise Exception("Ion mobility data required but not found in MSFragger output") + required_columns = ['mz', 'charge', 'protein', 'modified_peptide'] + if require_im: + required_columns.append('mobility') + + mapper.validate_required_columns(required_columns) + + if mapper.has_column('peptide'): + peptide_col = mapper.get_column('peptide') + mod_peptide_col = mapper.get_column('modified_peptide') + dataframe[mod_peptide_col] = dataframe[mod_peptide_col].replace('', pd.NA) + dataframe[mod_peptide_col] = dataframe[mod_peptide_col].fillna(dataframe[peptide_col]) return library_loader( dataframe, ptm_list, - mz='PrecursorMz', - im=im_col, - charge='PrecursorCharge', - protein='ProteinId', - modified_peptide='ModifiedPeptideSequence' + mz=mapper.get_column('mz'), + im=mapper.get_column('mobility'), + charge=mapper.get_column('charge'), + protein=mapper.get_column('protein'), + modified_peptide=mapper.get_column('modified_peptide') ) @@ -313,40 +384,37 @@ def __parse_diann_lib( Parameters: dataframe (pd.DataFrame): imported library file from the analysis software - "DIANN". Required columns: - 'PrecursorMz', - 'IonMobility', - 'PrecursorCharge', - 'ProteinName', - 'ModifiedPeptide', - 'decoy', - 'QValue'. + "DIANN". Required columns (supports multiple naming variants) ptm_list (list): a list with identifiers used for filtering a specific dataframe column. require_im (bool): if True, requires ion mobility data; if False, makes ion mobility optional. Returns: pd.DataFrame: returns a pre-filtered data frame with unified column names. """ - # Filter out decoys and apply Q-value thresholdt + # Initialize column mapper + mapper = ColumnMapper(dataframe) + + # Check required columns + required_columns = ['decoy', 'qvalue', 'mz', 'charge', 'protein', 'modified_peptide'] + if require_im: + required_columns.append('mobility') + + mapper.validate_required_columns(required_columns) + + # Filter dataframe filtered_dataframe = dataframe[ - (dataframe['decoy'] == 0) & # Remove decoy entries - (dataframe['QValue'] <= 0.01) # Filter for 1% FDR + (dataframe[mapper.get_column('decoy')] == 0) & + (dataframe[mapper.get_column('qvalue')] <= 0.01) ] - - # Check if IM column exists - im_col = 'IonMobility' if 'IonMobility' in dataframe.columns else None - if require_im and im_col is None: - raise Exception("Ion mobility data required but not found in DIANN library") - return library_loader( filtered_dataframe, ptm_list, - mz='PrecursorMz', - im=im_col, - charge='PrecursorCharge', - protein='ProteinName', - modified_peptide='ModifiedPeptide' + mz=mapper.get_column('mz'), + im=mapper.get_column('mobility'), + charge=mapper.get_column('charge'), + protein=mapper.get_column('protein'), + modified_peptide=mapper.get_column('modified_peptide') ) diff --git a/pydiaid/oadia/method_generator.py b/pydiaid/oadia/method_generator.py index 2e96c6a..0bf9b75 100755 --- a/pydiaid/oadia/method_generator.py +++ b/pydiaid/oadia/method_generator.py @@ -901,17 +901,9 @@ def adjust_bin_boundaries(bins, phospho_enriched=False): start_value = bins[i][0] end_value = bins[i][1] - # Adjust start value (except for first bin) - if i == 0: - adjusted_start = start_value - else: - adjusted_start = find_closest_forbidden_zone(start_value, phospho_enriched) - - # Adjust end value (except for last bin) - if i == len(bins) - 1: - adjusted_end = end_value - else: - adjusted_end = find_closest_forbidden_zone(end_value, phospho_enriched) + # Adjust value (including first and lastbin) + adjusted_start = find_closest_forbidden_zone(start_value, phospho_enriched) + adjusted_end = find_closest_forbidden_zone(end_value, phospho_enriched) adjusted_bins.append([adjusted_start, adjusted_end]) diff --git a/pydiaid/synchropasef/method_creator.py b/pydiaid/synchropasef/method_creator.py index 289417b..0c81abe 100644 --- a/pydiaid/synchropasef/method_creator.py +++ b/pydiaid/synchropasef/method_creator.py @@ -373,6 +373,15 @@ def calculate_scan_area( return df_scan_area + + +def check_slope(params_list): + """Fix floating-point precision errors in parameter lists""" + for i, params in enumerate(params_list): + col0, col1, col2, col3, col4 = params + slope = (col4 - col1) / (col3 - col0) + print(slope) + print(params) def generate_isolation_windows( @@ -438,17 +447,19 @@ def generate_isolation_windows( "2" ) + rounding_factor = 0 list_method_parameters = list() for index in range(len(mz_start_lower_IM)): list_temp = [ df_scan_area["lower_IM"].iloc[0], - round(mz_start_lower_IM[index], 1), - round(mz_start_lower_IM[index]+mz_width_lower_IM[index], 1), + np.round(mz_start_lower_IM[index], rounding_factor), + np.round(mz_start_lower_IM[index]+mz_width_lower_IM[index], rounding_factor), df_scan_area["upper_IM"].iloc[0], - round(mz_start_upper_IM[index], 1) + np.round(mz_start_upper_IM[index], rounding_factor) ] list_method_parameters.append(list_temp) - list_method_parameters + + # check_slope(list_method_parameters) df_method_parameters = pd.DataFrame( list_method_parameters, diff --git a/release/one_click_linux_gui/control b/release/one_click_linux_gui/control index 52c5841..eb90ed9 100644 --- a/release/one_click_linux_gui/control +++ b/release/one_click_linux_gui/control @@ -1,5 +1,5 @@ Package: pydiaid -Version: 0.0.30 +Version: 0.0.40 Architecture: all Maintainer: Mann Labs Description: py_diAID diff --git a/release/one_click_linux_gui/create_installer_linux.sh b/release/one_click_linux_gui/create_installer_linux.sh index 1a5d20f..3d8e6aa 100644 --- a/release/one_click_linux_gui/create_installer_linux.sh +++ b/release/one_click_linux_gui/create_installer_linux.sh @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel # Setting up the local package cd release/one_click_linux_gui # Make sure you include the required extra packages and always use the stable or very-stable options! -pip install "../../dist/pydiaid-0.0.30-py3-none-any.whl[stable]" +pip install "../../dist/pydiaid-0.0.40-py3-none-any.whl[stable]" # Creating the stand-alone pyinstaller folder pip install pyinstaller==4.10 diff --git a/release/one_click_macos_gui/Info.plist b/release/one_click_macos_gui/Info.plist index d963e35..f1a3f0b 100644 --- a/release/one_click_macos_gui/Info.plist +++ b/release/one_click_macos_gui/Info.plist @@ -9,9 +9,9 @@ CFBundleIconFile alpha_logo.icns CFBundleIdentifier - pydiaid.0.0.30 + pydiaid.0.0.40 CFBundleShortVersionString - 0.0.30 + 0.0.40 CFBundleInfoDictionaryVersion 6.0 CFBundleName diff --git a/release/one_click_macos_gui/create_installer_macos.sh b/release/one_click_macos_gui/create_installer_macos.sh index 491bc39..922c2b5 100644 --- a/release/one_click_macos_gui/create_installer_macos.sh +++ b/release/one_click_macos_gui/create_installer_macos.sh @@ -20,7 +20,7 @@ python setup.py sdist bdist_wheel # Setting up the local package cd release/one_click_macos_gui -pip install "../../dist/pydiaid-0.0.30-py3-none-any.whl[stable]" +pip install "../../dist/pydiaid-0.0.40-py3-none-any.whl[stable]" # Creating the stand-alone pyinstaller folder pip install pyinstaller==4.7 diff --git a/release/one_click_macos_gui/distribution.xml b/release/one_click_macos_gui/distribution.xml index eee9cc1..98fed75 100644 --- a/release/one_click_macos_gui/distribution.xml +++ b/release/one_click_macos_gui/distribution.xml @@ -1,6 +1,6 @@ - py_diAID 0.0.30 + py_diAID 0.0.40 diff --git a/release/one_click_windows_gui/create_installer_windows.sh b/release/one_click_windows_gui/create_installer_windows.sh index 93c3e3f..8209e71 100644 --- a/release/one_click_windows_gui/create_installer_windows.sh +++ b/release/one_click_windows_gui/create_installer_windows.sh @@ -7,6 +7,10 @@ cd ../.. rm -rf dist rm -rf build +conda init +conda config --remove channels defaults +conda config --add channels conda-forge + # Creating a conda environment conda create -n pydiaid_installer python=3.8 -y conda activate pydiaid_installer @@ -17,11 +21,14 @@ python setup.py sdist bdist_wheel # Setting up the local package cd release/one_click_windows_gui # Make sure you include the required extra packages and always use the stable or very-stable options! -pip install "../../dist/pydiaid-0.0.30-py3-none-any.whl[stable]" +pip install "../../dist/pydiaid-0.0.40-py3-none-any.whl[stable]" # Creating the stand-alone pyinstaller folder pip install pyinstaller==4.10 -pyinstaller ../pyinstaller/pydiaid.spec -y + +conda install -y scikit-optimize=0.9.0 numpy=1.23.5 openblas=0.3.23 scipy=1.10.1 pandas=1.3.4 matplotlib=3.6.0 + +pyinstaller ../pyinstaller/pydiaid.spec -y --clean conda deactivate # If needed, include additional source such as e.g.: diff --git a/release/one_click_windows_gui/pydiaid_innoinstaller.iss b/release/one_click_windows_gui/pydiaid_innoinstaller.iss index f0a110b..28484cd 100644 --- a/release/one_click_windows_gui/pydiaid_innoinstaller.iss +++ b/release/one_click_windows_gui/pydiaid_innoinstaller.iss @@ -2,7 +2,7 @@ ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! #define MyAppName "py_diAID" -#define MyAppVersion "0.0.30" +#define MyAppVersion "0.0.40" #define MyAppPublisher "Max Planck Institute of Biochemistry, Mann Labs" #define MyAppURL "https://github.com/MannLabs/pydiaid" #define MyAppExeName "pydiaid_gui.exe" diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 68649cc..83075e0 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -12,3 +12,4 @@ jinja2==3.0.2 shapely==2.0.2 matplotlib==3.6.0 alphabase==1.2.1 +fastparquet==0.8.3