Jhsmit
diff --git a/‎.github/workflows/pin_requirements.yml‎
Lines changed: 4 additions & 2 deletions b/‎.github/workflows/pin_requirements.yml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.github/workflows/pytest.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pytest.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 13 additions & 1 deletion b/‎.gitignore‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎docs/cli.md‎
Lines changed: 154 additions & 0 deletions b/‎docs/cli.md‎
Lines changed: 154 additions & 0 deletions
diff --git a/‎docs/fields.md‎
Lines changed: 6 additions & 0 deletions b/‎docs/fields.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎examples/from_custom_xlsx_file.py‎
Lines changed: 16 additions & 18 deletions b/‎examples/from_custom_xlsx_file.py‎
Lines changed: 16 additions & 18 deletions
diff --git a/‎examples/from_hxms_file.py‎
Lines changed: 1 addition & 10 deletions b/‎examples/from_hxms_file.py‎
Lines changed: 1 addition & 10 deletions
@@ -10,7 +10,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macOS-latest]
-        python-version: ["3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.11", "3.12", "3.13", "3.14"]
     steps:
       - name: Checkout code
         uses: actions/checkout@v3
@@ -29,11 +29,13 @@ jobs:
         with:
           name: req-artifact-${{ matrix.os }}-${{ matrix.python-version }}
           path: requirements-${{ matrix.os }}-${{ matrix.python-version }}.txt
+ 
+  
   merge:
     runs-on: ubuntu-latest
     needs: generate-requirements
     steps:
-      - name: Merge Artifacts
+      - name: Merge Requirements Artifacts
         uses: actions/upload-artifact/merge@v4
         with:
           name: all-requirements
 
@@ -6,7 +6,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.11", "3.12", "3.13", "3.14"]
     runs-on: ubuntu-latest
     steps:
       - name: Check out repository
 
@@ -126,4 +126,16 @@ __datasets/
 dev/
 
 # unpublished datasets
-datasets_private/
+datasets_private/
+
+# Node
+node_modules/
+npm-debug.log*
+yarn-debug.log*
+-error.log*
+dist/
+dist-ssr/
+*.local
+
+
+.claude
@@ -0,0 +1,154 @@
+# Command Line Interface (CLI)
+
+The `hdxms-datasets` package provides a command-line interface to help you create and manage HDX-MS datasets.
+
+## Installation
+
+First, install the package with the CLI dependencies:
+
+```bash
+pip install -e .
+```
+
+After installation, the `hdxms-datasets` command will be available in your terminal.
+
+## Commands
+
+### `hdxms-datasets create`
+
+Create a new HDX-MS dataset with a unique ID and template script.
+
+**Basic usage:**
+
+```bash
+hdxms-datasets create
+```
+
+This will:
+1. Generate a unique HDX dataset ID (e.g., `HDX_A1B2C3D4`)
+2. Create a new directory in the current directory: `<HDX_ID>/`
+3. Generate a template `create_dataset.py` script with configuration
+4. Create a `data/` subdirectory for your raw data files
+5. Generate a `README.md` with quick start instructions
+
+**Options:**
+
+- `--num-states, -n INTEGER`: Number of protein states (default: 1)
+- `--format, -f CHOICE`: Data format - OpenHDX, DynamX_v3_state, DynamX_v3_cluster, HDExaminer (default: OpenHDX)
+- `--ph FLOAT`: Experimental pH (default: 7.5)
+- `--temperature, -t FLOAT`: Temperature in Kelvin (default: 293.15)
+- `--database-dir, -d PATH`: Path to existing database directory to check for ID conflicts
+- `--help`: Show help message
+
+**Examples:**
+
+```bash
+# Create with defaults (OpenHDX, 1 state, pH 7.5, 20°C)
+hdxms-datasets create
+
+# Create with custom parameters
+hdxms-datasets create --num-states 2 --format DynamX_v3_state --ph 8.0 --temperature 298.15
+
+# Using short flags
+hdxms-datasets create -n 3 -f HDExaminer --ph 7.0 -t 293.15
+
+# Check for ID conflicts with existing database
+hdxms-datasets create --database-dir ~/hdx-database/datasets
+```
+
+## Configuration via Arguments
+
+All dataset configuration is specified via command-line arguments:
+
+- **Number of states** (`--num-states`): How many different protein states you measured (default: 1)
+- **Data format** (`--format`): Which software generated your data (default: OpenHDX)
+  - `OpenHDX` - OpenHDX format
+  - `DynamX_v3_state` - DynamX state files
+  - `DynamX_v3_cluster` - DynamX cluster files  
+  - `HDExaminer` - HDExaminer files
+- **pH** (`--ph`): Experimental pH value (default: 7.5)
+- **Temperature** (`--temperature`): Temperature in Kelvin (default: 293.15 K = 20°C)
+
+## Workflow Example
+
+```bash
+# Step 1: Create a new dataset with custom parameters
+$ hdxms-datasets create --num-states 2 --format DynamX_v3_state --ph 8.0
+
+✓ Generated new dataset ID: HDX_A1B2C3D4
+============================================================
+✓ Dataset template created successfully!
+============================================================
+
+Dataset ID:     HDX_A1B2C3D4
+Location:       C:\Users\username\HDX_A1B2C3D4
+Format:         DynamX_v3_state
+States:         2
+pH:             8.0
+Temperature:    293.15 K (20.0°C)
+
+Next steps:
+  1. cd HDX_A1B2C3D4
+  2. Place your data files in the data/ directory
+  3. Edit create_dataset.py with your specific information
+  4. python create_dataset.py
+
+# Step 2: Navigate to the new directory
+$ cd HDX_A1B2C3D4
+
+# Step 3: Copy your data files
+$ copy C:\path\to\my\data.csv data\
+
+# Step 4: Edit the template script
+$ notepad create_dataset.py
+# Edit the file with your specific information:
+#   - Replace protein sequences
+#   - Update data file names
+#   - Add author information
+#   - Add publication details
+
+# Step 5: Run the script to create your dataset
+$ python create_dataset.py
+✓ Dataset submitted successfully with ID: HDX_A1B2C3D4
+  Dataset location: C:\Users\username\HDX_A1B2C3D4\dataset\HDX_A1B2C3D4
+```
+
+## Generated Template Structure
+
+After running `hdxms-datasets create`, you'll have:
+
+```
+HDX_A1B2C3D4/
+├── create_dataset.py    # Template script to edit
+├── README.md            # Quick start guide
+└── data/                # Directory for your raw data files
+```
+
+The `create_dataset.py` template includes:
+- Clearly marked sections to edit
+- Inline comments explaining each field
+- List-based structure for protein states and peptides (flexible and easy to extend)
+- Pre-configured pH and temperature values from your command-line arguments
+- Example values to guide you
+- Automatic sequence verification
+- Dataset submission code
+
+Please note that this template is not exhaustive and other metadata fields may be used 
+depending on your dataset's requirements. 
+
+## Future Commands (Planned)
+
+The CLI is designed to be extensible. Future commands may include:
+
+- `hdxms-datasets validate`: Validate a dataset before submission
+- `hdxms-datasets upload`: Upload a dataset to a remote database
+- `hdxms-datasets export`: Export a dataset to different formats
+
+## Getting Help
+
+For more information about any command:
+
+```bash
+hdxms-datasets --help
+hdxms-datasets create --help
+```
@@ -89,7 +89,13 @@ Standard deviation of the uptake value
 ## Calculated fields:
 These fields are derived from other fields defined in the above sections.
 
+### n_replicates
+added after data aggregation
+Total number of replicates that were aggregated together
 
+### n_clusters
+added after data aggregation
+Total number of isotopic clusters that were aggregated together. When replicates include multiple isotopic clusters (different charged states), this value will be larger than n_replicates.
 
 ### frac_fd_control (float)
 Fractional deuterium uptake with respect to fully deuterated control sample
 
@@ -138,23 +138,6 @@
 
 # %%
 
-pub = Publication(
-    title="Simple and Fast Maximally Deuterated Control (maxD) Preparation for Hydrogen-Deuterium Exchange Mass Spectrometry Experiments",
-    doi="10.1021/acs.analchem.2c01446",
-    url="https://pubs.acs.org/doi/10.1021/acs.analchem.2c01446",
-)
-
-# %%
-# Make sure to add the correct licsense for your dataset
-# If you are the author, you can choose any license you like
-# The preferred / default license is CC0
-metadata = DatasetMetadata(  # type: ignore[call-arg]
-    authors=[Author(name="Daniele Peterle", affiliation="Northeastern University")],
-    publication=pub,
-    license="CC BY-NC 4.0",
-    conversion_notes="Converted published Supplementary data",
-)
-
 protein_info = ProteinIdentifiers(
     uniprot_accession_number="P68082",
     uniprot_entry_name="MYG_HORSE",
@@ -238,10 +221,25 @@
 
 # %%
 
+pub = Publication(
+    title="Simple and Fast Maximally Deuterated Control (maxD) Preparation for Hydrogen-Deuterium Exchange Mass Spectrometry Experiments",
+    doi="10.1021/acs.analchem.2c01446",
+    url="https://pubs.acs.org/doi/10.1021/acs.analchem.2c01446",
+)
+
+# Make sure to add the correct licsense for your dataset
+# If you are the author, you can choose any license you like
+# The preferred / default license is CC0
+
 dataset = HDXDataSet(  # type: ignore[call-arg]
     states=[state],
     description="1 Mb dataset from Peterle et al. 2022",
-    metadata=metadata,
+    metadata=DatasetMetadata(  # type: ignore[call-arg]
+        authors=[Author(name="Daniele Peterle", affiliation="Northeastern University")],
+        publication=pub,
+        license="CC BY-NC 4.0",
+        conversion_notes="Converted published Supplementary data",
+    ),
     protein_identifiers=protein_info,
     structure=structure,
 )
 
@@ -57,9 +57,6 @@
 # define the states, one state per file
 hxms_files = list(data_dir.glob("*.hxms"))
 hxms_files
-# %%
-hxms_file = hxms_files[2]
-hxms_file.stem.split("_")[-1]
 
 
 # %%
@@ -80,15 +77,11 @@ def get_ligand(fpath: Path) -> Optional[str]:
     return tag
 
 
-get_ligand(hxms_file)
-
 # %%
 
 # structure mapping: chain A, residue offset -15 to match sequence numbering
 mapping = StructureMapping(chain=["A"], residue_offset=-15)
 
-# %%
-
 
 # create a helper function to create a open-hdxms state object from the hdxms file
 def make_state(hxms_file: Path) -> State:
@@ -214,10 +207,8 @@ def make_state(hxms_file: Path) -> State:
 view
 
 # %%
-
-merged = merge_peptides(dataset.states[1].peptides)
-
 # compute uptake metrics (uptake, fractional deuterium), view result in peptide plot
+merged = merge_peptides(dataset.states[1].peptides)
 processed = compute_uptake_metrics(merged).to_polars()
 
 df_exposure = slice_exposure(processed)[5]