diff --git a/reproduction_QBM_VAE/README.md b/reproduction_QBM_VAE/README.md
new file mode 100644
index 0000000..1265614
--- /dev/null
+++ b/reproduction_QBM_VAE/README.md
@@ -0,0 +1 @@
+Code for Issue #70
diff --git a/reproduction_QBM_VAE/data_preprocess.py b/reproduction_QBM_VAE/data_preprocess.py
new file mode 100644
index 0000000..06e1022
--- /dev/null
+++ b/reproduction_QBM_VAE/data_preprocess.py
@@ -0,0 +1,204 @@
+import os
+import pickle
+import numpy as np
+from tqdm import tqdm
+import math
+
+# ================= DATA CONFIGURATION =================
+# Physics / Mass Spectrometry Constants
+MIN_MZ = 50.0
+MAX_MZ = 2500.0
+BIN_SIZE = 0.1
+# Dimension = (2500 - 50) / 0.1 + 1 ≈ 24501
+VECTOR_DIM = int((MAX_MZ - MIN_MZ) / BIN_SIZE) + 1
+
+# Vocabulary (Must match Inference Config)
+AA_VOCAB = {
+    '<PAD>': 0, '<SOS>': 1, '<EOS>': 2,
+    'G': 3, 'A': 4, 'S': 5, 'P': 6, 'V': 7, 'T': 8, 'C': 9, 'L': 10,
+    'I': 11, 'N': 12, 'D': 13, 'Q': 14, 'K': 15, 'E': 16, 'M': 17,
+    'H': 18, 'F': 19, 'R': 20, 'Y': 21, 'W': 22,
+    'M(ox)': 23, 'C(cam)': 24, 'N(deam)': 25, 'Q(deam)': 26
+}
+
+# IO Settings
+CHUNK_SIZE = 20000  # Number of samples per shard
+OUTPUT_DIR = "./processed_data_qbm_chunks"
+RAW_DATA_PATH = "./data/raw_data.pkl"  # Point this to your source file
+
+
+class SpectrumProcessor:
+    """
+    Handles the discretization and normalization of Mass Spectrometry data.
+    """
+
+    @staticmethod
+    def bin_spectrum(mz_array, intensity_array):
+        """
+        Converts raw m/z and intensity arrays into a fixed-dimensional dense vector.
+        """
+        vector = np.zeros(VECTOR_DIM, dtype=np.float32)
+
+        # Normalize intensity (Base Peak Normalization)
+        if len(intensity_array) > 0:
+            max_intensity = np.max(intensity_array)
+            if max_intensity > 0:
+                intensity_array = intensity_array / max_intensity
+
+        # Vectorization / Binning
+        for mz, inten in zip(mz_array, intensity_array):
+            if mz < MIN_MZ or mz >= MAX_MZ:
+                continue
+
+            bin_idx = int((mz - MIN_MZ) / BIN_SIZE)
+            if 0 <= bin_idx < VECTOR_DIM:
+                # Merge peaks falling into the same bin (Max pooling strategy)
+                vector[bin_idx] = max(vector[bin_idx], inten)
+
+        return vector
+
+
+class SequenceTokenizer:
+    """
+    Handles encoding of peptide sequences into integer tokens.
+    """
+
+    @staticmethod
+    def tokenize(sequence):
+        """
+        Wraps sequence with <SOS> and <EOS> and maps to indices.
+        Returns: List[int] or None if validation fails.
+        """
+        tokens = [AA_VOCAB['<SOS>']]
+
+        # Simple parsing logic (can be extended for complex modifications)
+        i = 0
+        n = len(sequence)
+        while i < n:
+            # Check for modifications like M(ox)
+            match = False
+            for mod_len in [7, 6, 5]:  # Try matching longest keys first
+                if i + mod_len <= n:
+                    sub = sequence[i: i + mod_len]
+                    if sub in AA_VOCAB:
+                        tokens.append(AA_VOCAB[sub])
+                        i += mod_len
+                        match = True
+                        break
+
+            if not match:
+                # Single amino acid
+                aa = sequence[i]
+                if aa in AA_VOCAB:
+                    tokens.append(AA_VOCAB[aa])
+                else:
+                    # Unknown AA strategy: Skip or map to <UNK> (here we skip)
+                    pass
+                i += 1
+
+        tokens.append(AA_VOCAB['<EOS>'])
+        return tokens
+
+
+def save_chunk(data, split, part_idx):
+    """Serializes a data shard to disk."""
+    if not data:
+        return
+
+    filename = os.path.join(OUTPUT_DIR, f"{split}_part_{part_idx}.pkl")
+    try:
+        with open(filename, 'wb') as f:
+            pickle.dump(data, f)
+        print(f"[IO] Saved shard: {filename} ({len(data)} samples)")
+    except IOError as e:
+        print(f"[ERROR] Failed to save shard {filename}: {e}")
+
+
+def process_pipeline(raw_source):
+    """
+    Main ETL pipeline: Load -> Transform -> Shard -> Save.
+    """
+    os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+    # Check raw data existence
+    if not os.path.exists(raw_source):
+        # Fallback for demonstration if user hasn't configured raw path
+        print(f"[WARN] Raw data not found at {raw_source}. Generating synthetic dummy data for verification.")
+        # GENERATE DUMMY DATA (Remove this block in production)
+        dummy_data = []
+        for _ in range(50000):
+            mz = np.random.uniform(100, 2000, 50)
+            inten = np.random.uniform(0, 1, 50)
+            seq = "PEPTIDESEQUENCE"
+            dummy_data.append({'m/z array': mz, 'intensity array': inten, 'sequence': seq})
+        raw_iterator = dummy_data
+    else:
+        print(f"[PROC] Loading raw data from {raw_source}...")
+        with open(raw_source, 'rb') as f:
+            raw_iterator = pickle.load(f)
+
+    print("[PROC] Starting vectorization and tokenization...")
+
+    train_buffer = []
+    test_buffer = []
+
+    # Split ratio configuration
+    test_ratio = 0.1
+
+    processed_count = 0
+    train_chunk_idx = 0
+    test_chunk_idx = 0
+
+    for item in tqdm(raw_iterator, desc="Processing"):
+        try:
+            # Extract fields (Adjust keys based on your raw data schema)
+            mz = item.get('m/z array')
+            inten = item.get('intensity array')
+            seq = item.get('sequence')
+
+            if mz is None or seq is None:
+                continue
+
+            # 1. Process Spectrum
+            x_vec = SpectrumProcessor.bin_spectrum(mz, inten)
+
+            # 2. Process Sequence
+            y_indices = SequenceTokenizer.tokenize(seq)
+
+            # Validation
+            if np.sum(x_vec) == 0 or len(y_indices) < 3:
+                continue
+
+            sample = {'x': x_vec, 'y': y_indices}
+
+            # Train/Test Split
+            if np.random.rand() < test_ratio:
+                test_buffer.append(sample)
+                if len(test_buffer) >= CHUNK_SIZE:
+                    save_chunk(test_buffer, 'test', test_chunk_idx)
+                    test_buffer = []
+                    test_chunk_idx += 1
+            else:
+                train_buffer.append(sample)
+                if len(train_buffer) >= CHUNK_SIZE:
+                    save_chunk(train_buffer, 'train', train_chunk_idx)
+                    train_buffer = []
+                    train_chunk_idx += 1
+
+            processed_count += 1
+
+        except Exception as e:
+            # Fail silently on individual bad samples to keep pipeline running
+            continue
+
+    # Flush remaining buffers
+    save_chunk(train_buffer, 'train', train_chunk_idx)
+    save_chunk(test_buffer, 'test', test_chunk_idx)
+
+    print(f"[DONE] ETL Pipeline complete. Processed {processed_count} valid samples.")
+
+
+if __name__ == "__main__":
+    # Ensure random seed for reproducibility during split
+    np.random.seed(42)
+    process_pipeline(RAW_DATA_PATH)
\ No newline at end of file
diff --git a/reproduction_QBM_VAE/dataset_loader.py b/reproduction_QBM_VAE/dataset_loader.py
new file mode 100644
index 0000000..7a792fe
--- /dev/null
+++ b/reproduction_QBM_VAE/dataset_loader.py
@@ -0,0 +1,86 @@
+import os
+import glob
+import pickle
+import numpy as np
+import torch
+from torch.utils.data import IterableDataset, DataLoader
+from torch.nn.utils.rnn import pad_sequence
+
+
+class ChunkedDataset(IterableDataset):
+    """
+    Implements an IterableDataset for efficient loading of sharded .pkl data files.
+    Designed to handle large-scale spectral data with limited memory footprint.
+    """
+
+    def __init__(self, data_dir, split_name, shuffle=False, max_files=None):
+        super(ChunkedDataset, self).__init__()
+        self.data_dir = data_dir
+        self.split_name = split_name
+        self.shuffle = shuffle
+
+        # Locate all data shards
+        pattern = os.path.join(data_dir, f"{split_name}_part_*.pkl")
+        full_file_list = sorted(glob.glob(pattern))
+
+        if not full_file_list:
+            print(f"[WARN] No data shards found in {data_dir} for split '{split_name}'. Check preprocessing.")
+            self.file_list = []
+        else:
+            # File selection strategy
+            if max_files is not None and max_files > 0:
+                self.file_list = full_file_list[:max_files]
+                print(
+                    f"[INFO] Fast-mode active. Loading {len(self.file_list)}/{len(full_file_list)} shards for '{split_name}'.")
+            else:
+                self.file_list = full_file_list
+                print(f"[INFO] Full-mode active. Loading all {len(self.file_list)} shards for '{split_name}'.")
+
+    def __iter__(self):
+        """Yields batches of (spectrum, sequence) pairs from disk."""
+        current_list = list(self.file_list)
+        if self.shuffle:
+            np.random.shuffle(current_list)
+
+        for file_path in current_list:
+            try:
+                with open(file_path, 'rb') as f:
+                    data_chunk = pickle.load(f)
+
+                # In-memory shuffle for the current chunk
+                if self.shuffle:
+                    np.random.shuffle(data_chunk)
+
+                for item in data_chunk:
+                    # Feature extraction: Sparse matrix to dense tensor
+                    if hasattr(item['x'], 'toarray'):
+                        x_dense = item['x'].toarray().flatten().astype(np.float32)
+                    else:
+                        x_dense = item['x'].flatten().astype(np.float32)
+
+                    x_tensor = torch.from_numpy(x_dense)
+                    y_tensor = torch.tensor(item['y'], dtype=torch.long)
+
+                    yield x_tensor, y_tensor
+
+            except IOError as e:
+                print(f"[ERROR] Failed to read shard {file_path}: {e}")
+                continue
+
+
+def collate_fn_pad(batch):
+    """
+    Custom collator to handle variable-length peptide sequences.
+    Pads sequences with 0 (<PAD>) to the maximum length in the batch.
+    """
+    xs, ys = zip(*batch)
+    xs_stacked = torch.stack(xs)
+    ys_padded = pad_sequence(ys, batch_first=True, padding_value=0)
+    return xs_stacked, ys_padded
+
+
+def get_dataloader(data_dir, split_name, batch_size=32, shuffle=False, max_files=None):
+    """Factory function to instantiate the DataLoader pipeline."""
+    dataset = ChunkedDataset(data_dir, split_name, shuffle=shuffle, max_files=max_files)
+    # pin_memory=False for CPU workloads to avoid overhead
+    return DataLoader(dataset, batch_size=batch_size, pin_memory=False, collate_fn=collate_fn_pad)
\ No newline at end of file
diff --git a/reproduction_QBM_VAE/models.py b/reproduction_QBM_VAE/models.py
new file mode 100644
index 0000000..e1cfc40
--- /dev/null
+++ b/reproduction_QBM_VAE/models.py
@@ -0,0 +1,147 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+# SDK Compatibility Layer
+try:
+    from kaiwu.torch_plugin import RestrictedBoltzmannMachine
+except ImportError:
+    print("[WARN] Kaiwu SDK not detected. Falling back to Dummy RBM implementation.")
+
+
+    class RestrictedBoltzmannMachine(nn.Module):
+        """Mock RBM for non-quantum environments."""
+
+        def __init__(self, num_visible, num_hidden, **kwargs):
+            super().__init__()
+            self.v_bias = nn.Parameter(torch.zeros(num_visible))
+
+        def energy(self, z):
+            return -(z * self.v_bias).sum(dim=1)
+
+
+class PeptideEncoder(nn.Module):
+    """Maps high-dimensional mass spectra to latent logits."""
+
+    def __init__(self, input_dim, hidden_dim, latent_dim):
+        super().__init__()
+        self.fc1 = nn.Linear(input_dim, 2048)
+        self.fc2 = nn.Linear(2048, hidden_dim)
+        self.fc_logits = nn.Linear(hidden_dim, latent_dim)
+        self.norm1 = nn.LayerNorm(hidden_dim)
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = F.dropout(x, 0.3)
+        x = F.relu(self.norm1(self.fc2(x)))
+        return self.fc_logits(x)
+
+
+class PeptideDecoder(nn.Module):
+    """Reconstructs peptide sequences from latent states using GRU."""
+
+    def __init__(self, latent_dim, hidden_dim, vocab_size):
+        super().__init__()
+        self.latent_to_hidden = nn.Linear(latent_dim, hidden_dim)
+        self.embedding = nn.Embedding(vocab_size, 128)
+        self.gru = nn.GRU(128, hidden_dim, batch_first=True)
+        self.fc_out = nn.Linear(hidden_dim, vocab_size)
+
+    def forward(self, z, target_seq):
+        hidden = self.latent_to_hidden(z).unsqueeze(0)
+        # Teacher forcing: Use ground truth previous token as input
+        dec_input = target_seq[:, :-1]
+        embedded = self.embedding(dec_input)
+        output, _ = self.gru(embedded, hidden)
+        prediction = self.fc_out(output)
+        return prediction
+
+
+class PeptideQVAE(nn.Module):
+    """
+    Quantum-Bounded Boltzmann Machine Variational Autoencoder (QBM-VAE).
+    Integrates a quantum-inspired energy-based prior into the VAE latent space.
+    """
+
+    def __init__(self, input_dim=24501, hidden_dim=512, latent_dim=64, vocab_size=30, kl_beta=0.001):
+        super().__init__()
+        self.latent_dim = latent_dim
+        self.kl_beta = kl_beta
+
+        self.encoder = PeptideEncoder(input_dim, hidden_dim, latent_dim)
+        self.decoder = PeptideDecoder(latent_dim, hidden_dim, vocab_size)
+
+        # Quantum Prior Initialization
+        self.rbm = RestrictedBoltzmannMachine(
+            num_visible=latent_dim,
+            num_hidden=latent_dim,
+            h_range=[-1, 1],
+            j_range=[-1, 1]
+        )
+        self._debug_flag = False
+
+    def reparameterize(self, logits):
+        """Bernoulli sampling relaxation (Gumbel-Softmax equivalent strategy)."""
+        if self.training:
+            return F.gumbel_softmax(logits, tau=1.0, hard=False)
+        else:
+            return (torch.sigmoid(logits) > 0.5).float()
+
+    def compute_energy_safe(self, z):
+        """Wrapper to handle SDK parameter naming variations dynamically."""
+        if hasattr(self.rbm, 'energy'):
+            return self.rbm.energy(z)
+
+        # Reflection-based parameter discovery for backward compatibility
+        bias_param = None
+        possible_names = ['v_bias', 'visible_bias', 'bias_v', 'bv', 'b_v']
+        for name in possible_names:
+            if hasattr(self.rbm, name):
+                bias_param = getattr(self.rbm, name)
+                break
+
+        if bias_param is not None:
+            return -(z * bias_param).sum(dim=1)
+
+        if not self._debug_flag:
+            print("[WARN] RBM parameter binding failed. Energy term set to zero (Dry-run mode).")
+            self._debug_flag = True
+
+        return torch.zeros(z.size(0), device=z.device)
+
+    def forward(self, x, target_seq):
+        logits = self.encoder(x)
+        z = self.reparameterize(logits)
+        seq_logits = self.decoder(z, target_seq)
+        energy = self.compute_energy_safe(z)
+        return seq_logits, z, logits, energy
+
+    def compute_loss(self, seq_logits, target_seq, logits_z, rbm_energy):
+        """
+        Calculates the variational objective:
+        Loss = Reconstruction_Error + beta * (Energy - Entropy)
+        """
+        target = target_seq[:, 1:]
+        # Alignment check
+        min_len = min(seq_logits.size(1), target.size(1))
+        seq_logits = seq_logits[:, :min_len, :]
+        target = target[:, :min_len]
+
+        # 1. Reconstruction Loss (Cross Entropy)
+        ce_loss = F.cross_entropy(
+            seq_logits.reshape(-1, seq_logits.size(-1)),
+            target.reshape(-1),
+            ignore_index=0
+        )
+
+        # 2. Regularization (Variational Free Energy approximation)
+        avg_energy = torch.mean(rbm_energy)
+        probs = torch.sigmoid(logits_z)
+        entropy = -torch.sum(probs * torch.log(probs + 1e-8) +
+                             (1 - probs) * torch.log(1 - probs + 1e-8), dim=1).mean()
+
+        prior_loss = avg_energy - entropy
+        total_loss = ce_loss + self.kl_beta * prior_loss
+
+        return total_loss, ce_loss, prior_loss
\ No newline at end of file
diff --git a/reproduction_QBM_VAE/predict.py b/reproduction_QBM_VAE/predict.py
new file mode 100644
index 0000000..48ea39f
--- /dev/null
+++ b/reproduction_QBM_VAE/predict.py
@@ -0,0 +1,132 @@
+import torch
+import os
+import numpy as np
+from models import PeptideQVAE
+from dataset_loader import get_dataloader
+
+# ================= INFERENCE CONFIG =================
+# Must match training configuration
+INPUT_DIM = 24501
+LATENT_DIM = 64
+HIDDEN_DIM = 512
+VOCAB_SIZE = 30
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+IO_CONFIG = {
+    'data_dir': "./processed_data_qbm_chunks",
+    'model_dir': "./models/Peptide_QVAE_Balanced",
+    'dataset_split': 'train',  # Set 'test' or 'train' for evaluation
+    'batch_size': 5
+}
+
+# Vocabulary Mapping
+AA_VOCAB = {
+    '<PAD>': 0, '<SOS>': 1, '<EOS>': 2,
+    'G': 3, 'A': 4, 'S': 5, 'P': 6, 'V': 7, 'T': 8, 'C': 9, 'L': 10,
+    'I': 11, 'N': 12, 'D': 13, 'Q': 14, 'K': 15, 'E': 16, 'M': 17,
+    'H': 18, 'F': 19, 'R': 20, 'Y': 21, 'W': 22,
+    'M(ox)': 23, 'C(cam)': 24, 'N(deam)': 25, 'Q(deam)': 26
+}
+IDX_TO_AA = {v: k for k, v in AA_VOCAB.items()}
+
+
+def resolve_latest_model(model_dir):
+    """Utility to fetch the latest checkpoint based on epoch numbering."""
+    try:
+        models = sorted([f for f in os.listdir(model_dir) if f.endswith('.pth')],
+                        key=lambda x: int(x.split('epoch')[1].split('.')[0]))
+        return os.path.join(model_dir, models[-1])
+    except Exception as e:
+        print(f"[ERROR] Could not resolve model from {model_dir}: {e}")
+        return None
+
+
+def decode_sequence(indices):
+    """Decodes tensor indices to peptide string, handling special tokens."""
+    seq = []
+    for idx in indices:
+        idx = idx.item()
+        if idx == 2: break  # EOS token
+        if idx not in [0, 1]:  # Skip PAD, SOS
+            seq.append(IDX_TO_AA.get(idx, '?'))
+    return ''.join(seq)
+
+
+def generate_sequence_greedy(model, x, max_len=50):
+    """
+    Performs greedy decoding strategy for sequence generation.
+    """
+    model.eval()
+    with torch.no_grad():
+        # Encode
+        logits = model.encoder(x)
+        z = (torch.sigmoid(logits) > 0.5).float()  # Hard thresholding
+
+        # Decode
+        hidden = model.decoder.latent_to_hidden(z).unsqueeze(0)
+        batch_size = x.size(0)
+        curr_input = torch.tensor([[1]] * batch_size, device=DEVICE)  # <SOS> token
+
+        generated_seqs = [[] for _ in range(batch_size)]
+        finished = [False] * batch_size
+
+        for _ in range(max_len):
+            embedded = model.decoder.embedding(curr_input)
+            output, hidden = model.decoder.gru(embedded, hidden)
+            pred_token = model.decoder.fc_out(output).argmax(dim=-1)
+            curr_input = pred_token
+
+            for i in range(batch_size):
+                token = pred_token[i].item()
+                if token == 2: finished[i] = True
+                if not finished[i] and token != 1:
+                    generated_seqs[i].append(token)
+            if all(finished): break
+
+    return generated_seqs
+
+
+if __name__ == "__main__":
+    latest_ckpt = resolve_latest_model(IO_CONFIG['model_dir'])
+    if not latest_ckpt: exit()
+
+    print(f"[INFO] Loading checkpoint: {latest_ckpt}")
+
+    # Data Loader
+    test_loader = get_dataloader(
+        IO_CONFIG['data_dir'],
+        IO_CONFIG['dataset_split'],
+        batch_size=IO_CONFIG['batch_size'],
+        max_files=1
+    )
+
+    # Model Setup
+    model = PeptideQVAE(INPUT_DIM, HIDDEN_DIM, LATENT_DIM, VOCAB_SIZE).to(DEVICE)
+    model.load_state_dict(torch.load(latest_ckpt, map_location=DEVICE))
+    model.eval()
+
+    print("\n[EVAL] Inference Sample Comparison")
+    print("-" * 110)
+    print(f"{'Ground Truth Sequence':<40} | {'Predicted Sequence':<40} | {'Match Status'}")
+    print("-" * 110)
+
+    sample_limit = 10
+    processed_count = 0
+
+    with torch.no_grad():
+        for x, y in test_loader:
+            x = x.to(DEVICE)
+            pred_indices = generate_sequence_greedy(model, x)
+
+            for i in range(len(y)):
+                true_str = decode_sequence(y[i])
+                pred_str = decode_sequence(torch.tensor(pred_indices[i]))
+
+                status = "[MATCH]" if true_str == pred_str else "[DIFF]"
+                if len(pred_str) == 0: status = "[EMPTY]"
+
+                print(f"{true_str:<40} | {pred_str:<40} | {status}")
+
+                processed_count += 1
+                if processed_count >= sample_limit: break
+            if processed_count >= sample_limit: break
\ No newline at end of file
diff --git a/reproduction_QBM_VAE/train_peptide.py b/reproduction_QBM_VAE/train_peptide.py
new file mode 100644
index 0000000..f8ebaa2
--- /dev/null
+++ b/reproduction_QBM_VAE/train_peptide.py
@@ -0,0 +1,92 @@
+import os
+import torch
+import torch.optim as optim
+from tqdm import tqdm
+import kaiwu
+from dataset_loader import get_dataloader
+from models import PeptideQVAE
+
+# ================= CONFIGURATION =================
+# SDK Credentials
+USER_ID = "91850531256946690"
+SDK_CODE = "lTj5v0u67gyWsMfXxKAbiJPkkT6w7u"
+
+# Hyperparameters (Balanced Profile for CPU)
+BATCH_SIZE = 32
+LEARNING_RATE = 1e-3
+NUM_EPOCHS = 30
+MAX_FILES = 1  # Shard limit for rapid iteration
+HIDDEN_DIM = 512
+LATENT_DIM = 64
+
+# IO Paths
+DATA_DIR = "./processed_data_qbm_chunks"
+SAVE_DIR = "./models/Peptide_QVAE_Balanced"
+
+# ================= INITIALIZATION =================
+print("[INIT] Initializing Kaiwu Quantum SDK...")
+try:
+    kaiwu.license.init(user_id=USER_ID, sdk_code=SDK_CODE)
+    print("[INFO] License verified successfully.")
+except Exception as e:
+    print(f"[WARN] License initialization failed: {e}")
+
+os.makedirs(SAVE_DIR, exist_ok=True)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"[INIT] Runtime device: {device}")
+
+# Pipeline Setup
+print(f"[DATA] Loading dataset (Limit: {MAX_FILES} shards)...")
+train_loader = get_dataloader(DATA_DIR, 'train', batch_size=BATCH_SIZE, shuffle=True, max_files=MAX_FILES)
+
+print(f"[MODEL] Building architecture (H={HIDDEN_DIM}, L={LATENT_DIM})...")
+model = PeptideQVAE(
+    input_dim=24501,
+    hidden_dim=HIDDEN_DIM,
+    latent_dim=LATENT_DIM,
+    kl_beta=0.001
+).to(device)
+
+optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
+
+# ================= TRAINING LOOP =================
+print(f"[TRAIN] Starting training loop for {NUM_EPOCHS} epochs.")
+
+for epoch in range(1, NUM_EPOCHS + 1):
+    model.train()
+    running_loss = 0.0
+    batch_counter = 0
+
+    pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{NUM_EPOCHS}")
+
+    for x, y in pbar:
+        x, y = x.to(device), y.to(device)
+        optimizer.zero_grad()
+
+        # Forward pass
+        seq_logits, z, logits_z, energy = model(x, y)
+
+        # Loss computation
+        loss, ce, prior = model.compute_loss(seq_logits, y, logits_z, energy)
+
+        # Backpropagation
+        loss.backward()
+        optimizer.step()
+
+        running_loss += loss.item()
+        batch_counter += 1
+
+        pbar.set_postfix({
+            'L_Total': f"{loss.item():.2f}",
+            'L_Recon': f"{ce.item():.2f}"
+        })
+
+    epoch_avg_loss = running_loss / max(1, batch_counter)
+    print(f"[LOG] Epoch {epoch} completed. Avg Loss: {epoch_avg_loss:.4f}")
+
+    # Checkpointing
+    ckpt_path = os.path.join(SAVE_DIR, f"qvae_balanced_epoch{epoch}.pth")
+    torch.save(model.state_dict(), ckpt_path)
+    print(f"[CKPT] Model state saved to {ckpt_path}")
+
+print("[DONE] Training pipeline finished successfully.")
\ No newline at end of file
diff --git "a/reproduction_QBM_VAE/\344\273\243\347\240\201\350\257\264\346\230\216\346\226\207\346\241\243.docx" "b/reproduction_QBM_VAE/\344\273\243\347\240\201\350\257\264\346\230\216\346\226\207\346\241\243.docx"
new file mode 100644
index 0000000..bd44340
Binary files /dev/null and "b/reproduction_QBM_VAE/\344\273\243\347\240\201\350\257\264\346\230\216\346\226\207\346\241\243.docx" differ
diff --git "a/reproduction_QBM_VAE/\346\225\260\346\215\256\351\233\206\351\223\276\346\216\245\345\222\214\350\277\220\350\241\214\351\241\272\345\272\217\344\273\245\345\217\212\346\263\250\346\204\217\344\272\213\351\241\271\350\257\264\346\230\216.txt" "b/reproduction_QBM_VAE/\346\225\260\346\215\256\351\233\206\351\223\276\346\216\245\345\222\214\350\277\220\350\241\214\351\241\272\345\272\217\344\273\245\345\217\212\346\263\250\346\204\217\344\272\213\351\241\271\350\257\264\346\230\216.txt"
new file mode 100644
index 0000000..dd5346b
--- /dev/null
+++ "b/reproduction_QBM_VAE/\346\225\260\346\215\256\351\233\206\351\223\276\346\216\245\345\222\214\350\277\220\350\241\214\351\241\272\345\272\217\344\273\245\345\217\212\346\263\250\346\204\217\344\272\213\351\241\271\350\257\264\346\230\216.txt"
@@ -0,0 +1,10 @@
+顺序
+按照 data_preprocess.py (预处理) -> train_peptide.py (训练) -> predict.py (推理评估) 的顺序依次运行即可。
+
+网盘链接
+通过网盘分享的文件：Protein
+链接: https://pan.baidu.com/s/1QNWTQjm6ePT_GutppKGfyw?pwd=zzfr 提取码: zzfr 
+--来自百度网盘超级会员v4的分享
+
+注意事项
+数据集和代码全部放到同一个目录下