Jhsmit
diff --git a/‎.github/workflows/pin_requirements.yml‎
Lines changed: 4 additions & 2 deletions b/‎.github/workflows/pin_requirements.yml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.github/workflows/pytest.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pytest.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 13 additions & 1 deletion b/‎.gitignore‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎docs/cli.md‎
Lines changed: 154 additions & 0 deletions b/‎docs/cli.md‎
Lines changed: 154 additions & 0 deletions
diff --git a/‎docs/fields.md‎
Lines changed: 16 additions & 1 deletion b/‎docs/fields.md‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎dynamx_state.pq‎
-23.2 KB b/‎dynamx_state.pq‎
-23.2 KB
diff --git a/‎examples/convert_to_openhdx.py‎ ‎examples/from_custom_xlsx_file.py‎examples/convert_to_openhdx.py renamed to examples/from_custom_xlsx_file.py
Lines changed: 18 additions & 24 deletions b/‎examples/convert_to_openhdx.py‎ ‎examples/from_custom_xlsx_file.py‎examples/convert_to_openhdx.py renamed to examples/from_custom_xlsx_file.py
Lines changed: 18 additions & 24 deletions
@@ -10,7 +10,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macOS-latest]
-        python-version: ["3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.11", "3.12", "3.13", "3.14"]
     steps:
       - name: Checkout code
         uses: actions/checkout@v3
@@ -29,11 +29,13 @@ jobs:
         with:
           name: req-artifact-${{ matrix.os }}-${{ matrix.python-version }}
           path: requirements-${{ matrix.os }}-${{ matrix.python-version }}.txt
+ 
+  
   merge:
     runs-on: ubuntu-latest
     needs: generate-requirements
     steps:
-      - name: Merge Artifacts
+      - name: Merge Requirements Artifacts
         uses: actions/upload-artifact/merge@v4
         with:
           name: all-requirements
 
@@ -6,7 +6,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.11", "3.12", "3.13", "3.14"]
     runs-on: ubuntu-latest
     steps:
       - name: Check out repository
 
@@ -126,4 +126,16 @@ __datasets/
 dev/
 
 # unpublished datasets
-datasets_private/
+datasets_private/
+
+# Node
+node_modules/
+npm-debug.log*
+yarn-debug.log*
+-error.log*
+dist/
+dist-ssr/
+*.local
+
+
+.claude
@@ -0,0 +1,154 @@
+# Command Line Interface (CLI)
+
+The `hdxms-datasets` package provides a command-line interface to help you create and manage HDX-MS datasets.
+
+## Installation
+
+First, install the package with the CLI dependencies:
+
+```bash
+pip install -e .
+```
+
+After installation, the `hdxms-datasets` command will be available in your terminal.
+
+## Commands
+
+### `hdxms-datasets create`
+
+Create a new HDX-MS dataset with a unique ID and template script.
+
+**Basic usage:**
+
+```bash
+hdxms-datasets create
+```
+
+This will:
+1. Generate a unique HDX dataset ID (e.g., `HDX_A1B2C3D4`)
+2. Create a new directory in the current directory: `<HDX_ID>/`
+3. Generate a template `create_dataset.py` script with configuration
+4. Create a `data/` subdirectory for your raw data files
+5. Generate a `README.md` with quick start instructions
+
+**Options:**
+
+- `--num-states, -n INTEGER`: Number of protein states (default: 1)
+- `--format, -f CHOICE`: Data format - OpenHDX, DynamX_v3_state, DynamX_v3_cluster, HDExaminer (default: OpenHDX)
+- `--ph FLOAT`: Experimental pH (default: 7.5)
+- `--temperature, -t FLOAT`: Temperature in Kelvin (default: 293.15)
+- `--database-dir, -d PATH`: Path to existing database directory to check for ID conflicts
+- `--help`: Show help message
+
+**Examples:**
+
+```bash
+# Create with defaults (OpenHDX, 1 state, pH 7.5, 20°C)
+hdxms-datasets create
+
+# Create with custom parameters
+hdxms-datasets create --num-states 2 --format DynamX_v3_state --ph 8.0 --temperature 298.15
+
+# Using short flags
+hdxms-datasets create -n 3 -f HDExaminer --ph 7.0 -t 293.15
+
+# Check for ID conflicts with existing database
+hdxms-datasets create --database-dir ~/hdx-database/datasets
+```
+
+## Configuration via Arguments
+
+All dataset configuration is specified via command-line arguments:
+
+- **Number of states** (`--num-states`): How many different protein states you measured (default: 1)
+- **Data format** (`--format`): Which software generated your data (default: OpenHDX)
+  - `OpenHDX` - OpenHDX format
+  - `DynamX_v3_state` - DynamX state files
+  - `DynamX_v3_cluster` - DynamX cluster files  
+  - `HDExaminer` - HDExaminer files
+- **pH** (`--ph`): Experimental pH value (default: 7.5)
+- **Temperature** (`--temperature`): Temperature in Kelvin (default: 293.15 K = 20°C)
+
+## Workflow Example
+
+```bash
+# Step 1: Create a new dataset with custom parameters
+$ hdxms-datasets create --num-states 2 --format DynamX_v3_state --ph 8.0
+
+✓ Generated new dataset ID: HDX_A1B2C3D4
+============================================================
+✓ Dataset template created successfully!
+============================================================
+
+Dataset ID:     HDX_A1B2C3D4
+Location:       C:\Users\username\HDX_A1B2C3D4
+Format:         DynamX_v3_state
+States:         2
+pH:             8.0
+Temperature:    293.15 K (20.0°C)
+
+Next steps:
+  1. cd HDX_A1B2C3D4
+  2. Place your data files in the data/ directory
+  3. Edit create_dataset.py with your specific information
+  4. python create_dataset.py
+
+# Step 2: Navigate to the new directory
+$ cd HDX_A1B2C3D4
+
+# Step 3: Copy your data files
+$ copy C:\path\to\my\data.csv data\
+
+# Step 4: Edit the template script
+$ notepad create_dataset.py
+# Edit the file with your specific information:
+#   - Replace protein sequences
+#   - Update data file names
+#   - Add author information
+#   - Add publication details
+
+# Step 5: Run the script to create your dataset
+$ python create_dataset.py
+✓ Dataset submitted successfully with ID: HDX_A1B2C3D4
+  Dataset location: C:\Users\username\HDX_A1B2C3D4\dataset\HDX_A1B2C3D4
+```
+
+## Generated Template Structure
+
+After running `hdxms-datasets create`, you'll have:
+
+```
+HDX_A1B2C3D4/
+├── create_dataset.py    # Template script to edit
+├── README.md            # Quick start guide
+└── data/                # Directory for your raw data files
+```
+
+The `create_dataset.py` template includes:
+- Clearly marked sections to edit
+- Inline comments explaining each field
+- List-based structure for protein states and peptides (flexible and easy to extend)
+- Pre-configured pH and temperature values from your command-line arguments
+- Example values to guide you
+- Automatic sequence verification
+- Dataset submission code
+
+Please note that this template is not exhaustive and other metadata fields may be used 
+depending on your dataset's requirements. 
+
+## Future Commands (Planned)
+
+The CLI is designed to be extensible. Future commands may include:
+
+- `hdxms-datasets validate`: Validate a dataset before submission
+- `hdxms-datasets upload`: Upload a dataset to a remote database
+- `hdxms-datasets export`: Export a dataset to different formats
+
+## Getting Help
+
+For more information about any command:
+
+```bash
+hdxms-datasets --help
+hdxms-datasets create --help
+```
@@ -1,3 +1,9 @@
+# Fields
+
+This document describes the fields used in open-hdxms files. The fields are divided into required, optional, and calculated fields.
+
+Some fields can be both calculated from raw data (ie uptake) or provided directly
+
 ### start (int)
 residue number of the first amino acid in the peptide
 
@@ -12,6 +18,7 @@ state label
 
 DynamX state/cluster name: State
 HDExaminer name: Protein State
+hxms name: PROTEIN_STATE
 
 ### replicate (str)
 Label for the replicate
@@ -64,7 +71,9 @@ DynamX name?? is this max or mean intensity?
 These fields can be present in open-hdxms files, but can also be calculated from the other fields.
 
 ### max_uptake (int)
-Theoretical maximum deuterium uptake for the peptide. Typically equal to the number of amide hydrogens, thus number of non-proline residues minus one. 
+Theoretical maximum deuterium uptake for the peptide. Equal to the number of 
+non proline residues. Not that back-exchange is not considered here, including
+back exchange of the N-terminal amide. 
 
 
 ### uptake (float)
@@ -80,7 +89,13 @@ Standard deviation of the uptake value
 ## Calculated fields:
 These fields are derived from other fields defined in the above sections.
 
+### n_replicates
+added after data aggregation
+Total number of replicates that were aggregated together
 
+### n_clusters
+added after data aggregation
+Total number of isotopic clusters that were aggregated together. When replicates include multiple isotopic clusters (different charged states), this value will be larger than n_replicates.
 
 ### frac_fd_control (float)
 Fractional deuterium uptake with respect to fully deuterated control sample
 
@@ -138,23 +138,6 @@
 
 # %%
 
-pub = Publication(
-    title="Simple and Fast Maximally Deuterated Control (maxD) Preparation for Hydrogen-Deuterium Exchange Mass Spectrometry Experiments",
-    doi="10.1021/acs.analchem.2c01446",
-    url="https://pubs.acs.org/doi/10.1021/acs.analchem.2c01446",
-)
-
-# %%
-# Make sure to add the correct licsense for your dataset
-# If you are the author, you can choose any license you like
-# The preferred / default license is CC0
-metadata = DatasetMetadata(  # type: ignore[call-arg]
-    authors=[Author(name="Daniele Peterle", affiliation="Northeastern University")],
-    publication=pub,
-    license="CC BY-NC 4.0",
-    conversion_notes="Converted published Supplementary data",
-)
-
 protein_info = ProteinIdentifiers(
     uniprot_accession_number="P68082",
     uniprot_entry_name="MYG_HORSE",
@@ -173,10 +156,8 @@
 structure = Structure(
     data_file=data_dir / "1azi.cif",
     format="cif",
-    description="",
+    description="MYOGLOBIN (HORSE HEART) RECOMBINANT WILD-TYPE COMPLEXED WITH AZIDE ",
     pdb_id="1AZI",
-    residue_offset=0,  # HDX data residue numbers match the PDB, no offset
-    auth_residue_numbers=False,  # HDX data residue numbers are RCSB numbering (not author or is the same)
 )
 
 # define the sequence in this protein state
@@ -217,20 +198,18 @@
     pH=7.1,
     temperature=20 + 273.15,
     d_percentage=90.0,
-    chain=["A"],
 )
 
 fd_peptides = Peptides(  # type: ignore[call-arg]
     data_file=data_dir / "1_Mb_fd_peptides.csv",
     data_format=PeptideFormat.OpenHDX,
     deuteration_type="fully_deuterated",
     d_percentage=90.0,
-    chain=["A"],
 )
 
 # %%
 # we can create a view of the structure and for example check peptide redundancy
-StructureView(structure).peptide_redundancy(pd_peptides)
+StructureView(structure).peptide_redundancy(pd_peptides.load())
 
 # %%
 # This dataset has only one state, which is WT
@@ -242,10 +221,25 @@
 
 # %%
 
+pub = Publication(
+    title="Simple and Fast Maximally Deuterated Control (maxD) Preparation for Hydrogen-Deuterium Exchange Mass Spectrometry Experiments",
+    doi="10.1021/acs.analchem.2c01446",
+    url="https://pubs.acs.org/doi/10.1021/acs.analchem.2c01446",
+)
+
+# Make sure to add the correct licsense for your dataset
+# If you are the author, you can choose any license you like
+# The preferred / default license is CC0
+
 dataset = HDXDataSet(  # type: ignore[call-arg]
     states=[state],
     description="1 Mb dataset from Peterle et al. 2022",
-    metadata=metadata,
+    metadata=DatasetMetadata(  # type: ignore[call-arg]
+        authors=[Author(name="Daniele Peterle", affiliation="Northeastern University")],
+        publication=pub,
+        license="CC BY-NC 4.0",
+        conversion_notes="Converted published Supplementary data",
+    ),
     protein_identifiers=protein_info,
     structure=structure,
 )