diff --git a/reproduction_QBM_VAE/README.md b/reproduction_QBM_VAE/README.md new file mode 100644 index 0000000..1265614 --- /dev/null +++ b/reproduction_QBM_VAE/README.md @@ -0,0 +1 @@ +Code for Issue #70 diff --git a/reproduction_QBM_VAE/data_preprocess.py b/reproduction_QBM_VAE/data_preprocess.py new file mode 100644 index 0000000..06e1022 --- /dev/null +++ b/reproduction_QBM_VAE/data_preprocess.py @@ -0,0 +1,204 @@ +import os +import pickle +import numpy as np +from tqdm import tqdm +import math + +# ================= DATA CONFIGURATION ================= +# Physics / Mass Spectrometry Constants +MIN_MZ = 50.0 +MAX_MZ = 2500.0 +BIN_SIZE = 0.1 +# Dimension = (2500 - 50) / 0.1 + 1 ≈ 24501 +VECTOR_DIM = int((MAX_MZ - MIN_MZ) / BIN_SIZE) + 1 + +# Vocabulary (Must match Inference Config) +AA_VOCAB = { + '': 0, '': 1, '': 2, + 'G': 3, 'A': 4, 'S': 5, 'P': 6, 'V': 7, 'T': 8, 'C': 9, 'L': 10, + 'I': 11, 'N': 12, 'D': 13, 'Q': 14, 'K': 15, 'E': 16, 'M': 17, + 'H': 18, 'F': 19, 'R': 20, 'Y': 21, 'W': 22, + 'M(ox)': 23, 'C(cam)': 24, 'N(deam)': 25, 'Q(deam)': 26 +} + +# IO Settings +CHUNK_SIZE = 20000 # Number of samples per shard +OUTPUT_DIR = "./processed_data_qbm_chunks" +RAW_DATA_PATH = "./data/raw_data.pkl" # Point this to your source file + + +class SpectrumProcessor: + """ + Handles the discretization and normalization of Mass Spectrometry data. + """ + + @staticmethod + def bin_spectrum(mz_array, intensity_array): + """ + Converts raw m/z and intensity arrays into a fixed-dimensional dense vector. + """ + vector = np.zeros(VECTOR_DIM, dtype=np.float32) + + # Normalize intensity (Base Peak Normalization) + if len(intensity_array) > 0: + max_intensity = np.max(intensity_array) + if max_intensity > 0: + intensity_array = intensity_array / max_intensity + + # Vectorization / Binning + for mz, inten in zip(mz_array, intensity_array): + if mz < MIN_MZ or mz >= MAX_MZ: + continue + + bin_idx = int((mz - MIN_MZ) / BIN_SIZE) + if 0 <= bin_idx < VECTOR_DIM: + # Merge peaks falling into the same bin (Max pooling strategy) + vector[bin_idx] = max(vector[bin_idx], inten) + + return vector + + +class SequenceTokenizer: + """ + Handles encoding of peptide sequences into integer tokens. + """ + + @staticmethod + def tokenize(sequence): + """ + Wraps sequence with and and maps to indices. + Returns: List[int] or None if validation fails. + """ + tokens = [AA_VOCAB['']] + + # Simple parsing logic (can be extended for complex modifications) + i = 0 + n = len(sequence) + while i < n: + # Check for modifications like M(ox) + match = False + for mod_len in [7, 6, 5]: # Try matching longest keys first + if i + mod_len <= n: + sub = sequence[i: i + mod_len] + if sub in AA_VOCAB: + tokens.append(AA_VOCAB[sub]) + i += mod_len + match = True + break + + if not match: + # Single amino acid + aa = sequence[i] + if aa in AA_VOCAB: + tokens.append(AA_VOCAB[aa]) + else: + # Unknown AA strategy: Skip or map to (here we skip) + pass + i += 1 + + tokens.append(AA_VOCAB['']) + return tokens + + +def save_chunk(data, split, part_idx): + """Serializes a data shard to disk.""" + if not data: + return + + filename = os.path.join(OUTPUT_DIR, f"{split}_part_{part_idx}.pkl") + try: + with open(filename, 'wb') as f: + pickle.dump(data, f) + print(f"[IO] Saved shard: {filename} ({len(data)} samples)") + except IOError as e: + print(f"[ERROR] Failed to save shard {filename}: {e}") + + +def process_pipeline(raw_source): + """ + Main ETL pipeline: Load -> Transform -> Shard -> Save. + """ + os.makedirs(OUTPUT_DIR, exist_ok=True) + + # Check raw data existence + if not os.path.exists(raw_source): + # Fallback for demonstration if user hasn't configured raw path + print(f"[WARN] Raw data not found at {raw_source}. Generating synthetic dummy data for verification.") + # GENERATE DUMMY DATA (Remove this block in production) + dummy_data = [] + for _ in range(50000): + mz = np.random.uniform(100, 2000, 50) + inten = np.random.uniform(0, 1, 50) + seq = "PEPTIDESEQUENCE" + dummy_data.append({'m/z array': mz, 'intensity array': inten, 'sequence': seq}) + raw_iterator = dummy_data + else: + print(f"[PROC] Loading raw data from {raw_source}...") + with open(raw_source, 'rb') as f: + raw_iterator = pickle.load(f) + + print("[PROC] Starting vectorization and tokenization...") + + train_buffer = [] + test_buffer = [] + + # Split ratio configuration + test_ratio = 0.1 + + processed_count = 0 + train_chunk_idx = 0 + test_chunk_idx = 0 + + for item in tqdm(raw_iterator, desc="Processing"): + try: + # Extract fields (Adjust keys based on your raw data schema) + mz = item.get('m/z array') + inten = item.get('intensity array') + seq = item.get('sequence') + + if mz is None or seq is None: + continue + + # 1. Process Spectrum + x_vec = SpectrumProcessor.bin_spectrum(mz, inten) + + # 2. Process Sequence + y_indices = SequenceTokenizer.tokenize(seq) + + # Validation + if np.sum(x_vec) == 0 or len(y_indices) < 3: + continue + + sample = {'x': x_vec, 'y': y_indices} + + # Train/Test Split + if np.random.rand() < test_ratio: + test_buffer.append(sample) + if len(test_buffer) >= CHUNK_SIZE: + save_chunk(test_buffer, 'test', test_chunk_idx) + test_buffer = [] + test_chunk_idx += 1 + else: + train_buffer.append(sample) + if len(train_buffer) >= CHUNK_SIZE: + save_chunk(train_buffer, 'train', train_chunk_idx) + train_buffer = [] + train_chunk_idx += 1 + + processed_count += 1 + + except Exception as e: + # Fail silently on individual bad samples to keep pipeline running + continue + + # Flush remaining buffers + save_chunk(train_buffer, 'train', train_chunk_idx) + save_chunk(test_buffer, 'test', test_chunk_idx) + + print(f"[DONE] ETL Pipeline complete. Processed {processed_count} valid samples.") + + +if __name__ == "__main__": + # Ensure random seed for reproducibility during split + np.random.seed(42) + process_pipeline(RAW_DATA_PATH) \ No newline at end of file diff --git a/reproduction_QBM_VAE/dataset_loader.py b/reproduction_QBM_VAE/dataset_loader.py new file mode 100644 index 0000000..7a792fe --- /dev/null +++ b/reproduction_QBM_VAE/dataset_loader.py @@ -0,0 +1,86 @@ +import os +import glob +import pickle +import numpy as np +import torch +from torch.utils.data import IterableDataset, DataLoader +from torch.nn.utils.rnn import pad_sequence + + +class ChunkedDataset(IterableDataset): + """ + Implements an IterableDataset for efficient loading of sharded .pkl data files. + Designed to handle large-scale spectral data with limited memory footprint. + """ + + def __init__(self, data_dir, split_name, shuffle=False, max_files=None): + super(ChunkedDataset, self).__init__() + self.data_dir = data_dir + self.split_name = split_name + self.shuffle = shuffle + + # Locate all data shards + pattern = os.path.join(data_dir, f"{split_name}_part_*.pkl") + full_file_list = sorted(glob.glob(pattern)) + + if not full_file_list: + print(f"[WARN] No data shards found in {data_dir} for split '{split_name}'. Check preprocessing.") + self.file_list = [] + else: + # File selection strategy + if max_files is not None and max_files > 0: + self.file_list = full_file_list[:max_files] + print( + f"[INFO] Fast-mode active. Loading {len(self.file_list)}/{len(full_file_list)} shards for '{split_name}'.") + else: + self.file_list = full_file_list + print(f"[INFO] Full-mode active. Loading all {len(self.file_list)} shards for '{split_name}'.") + + def __iter__(self): + """Yields batches of (spectrum, sequence) pairs from disk.""" + current_list = list(self.file_list) + if self.shuffle: + np.random.shuffle(current_list) + + for file_path in current_list: + try: + with open(file_path, 'rb') as f: + data_chunk = pickle.load(f) + + # In-memory shuffle for the current chunk + if self.shuffle: + np.random.shuffle(data_chunk) + + for item in data_chunk: + # Feature extraction: Sparse matrix to dense tensor + if hasattr(item['x'], 'toarray'): + x_dense = item['x'].toarray().flatten().astype(np.float32) + else: + x_dense = item['x'].flatten().astype(np.float32) + + x_tensor = torch.from_numpy(x_dense) + y_tensor = torch.tensor(item['y'], dtype=torch.long) + + yield x_tensor, y_tensor + + except IOError as e: + print(f"[ERROR] Failed to read shard {file_path}: {e}") + continue + + +def collate_fn_pad(batch): + """ + Custom collator to handle variable-length peptide sequences. + Pads sequences with 0 () to the maximum length in the batch. + """ + xs, ys = zip(*batch) + xs_stacked = torch.stack(xs) + ys_padded = pad_sequence(ys, batch_first=True, padding_value=0) + return xs_stacked, ys_padded + + +def get_dataloader(data_dir, split_name, batch_size=32, shuffle=False, max_files=None): + """Factory function to instantiate the DataLoader pipeline.""" + dataset = ChunkedDataset(data_dir, split_name, shuffle=shuffle, max_files=max_files) + # pin_memory=False for CPU workloads to avoid overhead + return DataLoader(dataset, batch_size=batch_size, pin_memory=False, collate_fn=collate_fn_pad) \ No newline at end of file diff --git a/reproduction_QBM_VAE/models.py b/reproduction_QBM_VAE/models.py new file mode 100644 index 0000000..e1cfc40 --- /dev/null +++ b/reproduction_QBM_VAE/models.py @@ -0,0 +1,147 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +# SDK Compatibility Layer +try: + from kaiwu.torch_plugin import RestrictedBoltzmannMachine +except ImportError: + print("[WARN] Kaiwu SDK not detected. Falling back to Dummy RBM implementation.") + + + class RestrictedBoltzmannMachine(nn.Module): + """Mock RBM for non-quantum environments.""" + + def __init__(self, num_visible, num_hidden, **kwargs): + super().__init__() + self.v_bias = nn.Parameter(torch.zeros(num_visible)) + + def energy(self, z): + return -(z * self.v_bias).sum(dim=1) + + +class PeptideEncoder(nn.Module): + """Maps high-dimensional mass spectra to latent logits.""" + + def __init__(self, input_dim, hidden_dim, latent_dim): + super().__init__() + self.fc1 = nn.Linear(input_dim, 2048) + self.fc2 = nn.Linear(2048, hidden_dim) + self.fc_logits = nn.Linear(hidden_dim, latent_dim) + self.norm1 = nn.LayerNorm(hidden_dim) + + def forward(self, x): + x = F.relu(self.fc1(x)) + x = F.dropout(x, 0.3) + x = F.relu(self.norm1(self.fc2(x))) + return self.fc_logits(x) + + +class PeptideDecoder(nn.Module): + """Reconstructs peptide sequences from latent states using GRU.""" + + def __init__(self, latent_dim, hidden_dim, vocab_size): + super().__init__() + self.latent_to_hidden = nn.Linear(latent_dim, hidden_dim) + self.embedding = nn.Embedding(vocab_size, 128) + self.gru = nn.GRU(128, hidden_dim, batch_first=True) + self.fc_out = nn.Linear(hidden_dim, vocab_size) + + def forward(self, z, target_seq): + hidden = self.latent_to_hidden(z).unsqueeze(0) + # Teacher forcing: Use ground truth previous token as input + dec_input = target_seq[:, :-1] + embedded = self.embedding(dec_input) + output, _ = self.gru(embedded, hidden) + prediction = self.fc_out(output) + return prediction + + +class PeptideQVAE(nn.Module): + """ + Quantum-Bounded Boltzmann Machine Variational Autoencoder (QBM-VAE). + Integrates a quantum-inspired energy-based prior into the VAE latent space. + """ + + def __init__(self, input_dim=24501, hidden_dim=512, latent_dim=64, vocab_size=30, kl_beta=0.001): + super().__init__() + self.latent_dim = latent_dim + self.kl_beta = kl_beta + + self.encoder = PeptideEncoder(input_dim, hidden_dim, latent_dim) + self.decoder = PeptideDecoder(latent_dim, hidden_dim, vocab_size) + + # Quantum Prior Initialization + self.rbm = RestrictedBoltzmannMachine( + num_visible=latent_dim, + num_hidden=latent_dim, + h_range=[-1, 1], + j_range=[-1, 1] + ) + self._debug_flag = False + + def reparameterize(self, logits): + """Bernoulli sampling relaxation (Gumbel-Softmax equivalent strategy).""" + if self.training: + return F.gumbel_softmax(logits, tau=1.0, hard=False) + else: + return (torch.sigmoid(logits) > 0.5).float() + + def compute_energy_safe(self, z): + """Wrapper to handle SDK parameter naming variations dynamically.""" + if hasattr(self.rbm, 'energy'): + return self.rbm.energy(z) + + # Reflection-based parameter discovery for backward compatibility + bias_param = None + possible_names = ['v_bias', 'visible_bias', 'bias_v', 'bv', 'b_v'] + for name in possible_names: + if hasattr(self.rbm, name): + bias_param = getattr(self.rbm, name) + break + + if bias_param is not None: + return -(z * bias_param).sum(dim=1) + + if not self._debug_flag: + print("[WARN] RBM parameter binding failed. Energy term set to zero (Dry-run mode).") + self._debug_flag = True + + return torch.zeros(z.size(0), device=z.device) + + def forward(self, x, target_seq): + logits = self.encoder(x) + z = self.reparameterize(logits) + seq_logits = self.decoder(z, target_seq) + energy = self.compute_energy_safe(z) + return seq_logits, z, logits, energy + + def compute_loss(self, seq_logits, target_seq, logits_z, rbm_energy): + """ + Calculates the variational objective: + Loss = Reconstruction_Error + beta * (Energy - Entropy) + """ + target = target_seq[:, 1:] + # Alignment check + min_len = min(seq_logits.size(1), target.size(1)) + seq_logits = seq_logits[:, :min_len, :] + target = target[:, :min_len] + + # 1. Reconstruction Loss (Cross Entropy) + ce_loss = F.cross_entropy( + seq_logits.reshape(-1, seq_logits.size(-1)), + target.reshape(-1), + ignore_index=0 + ) + + # 2. Regularization (Variational Free Energy approximation) + avg_energy = torch.mean(rbm_energy) + probs = torch.sigmoid(logits_z) + entropy = -torch.sum(probs * torch.log(probs + 1e-8) + + (1 - probs) * torch.log(1 - probs + 1e-8), dim=1).mean() + + prior_loss = avg_energy - entropy + total_loss = ce_loss + self.kl_beta * prior_loss + + return total_loss, ce_loss, prior_loss \ No newline at end of file diff --git a/reproduction_QBM_VAE/predict.py b/reproduction_QBM_VAE/predict.py new file mode 100644 index 0000000..48ea39f --- /dev/null +++ b/reproduction_QBM_VAE/predict.py @@ -0,0 +1,132 @@ +import torch +import os +import numpy as np +from models import PeptideQVAE +from dataset_loader import get_dataloader + +# ================= INFERENCE CONFIG ================= +# Must match training configuration +INPUT_DIM = 24501 +LATENT_DIM = 64 +HIDDEN_DIM = 512 +VOCAB_SIZE = 30 +DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +IO_CONFIG = { + 'data_dir': "./processed_data_qbm_chunks", + 'model_dir': "./models/Peptide_QVAE_Balanced", + 'dataset_split': 'train', # Set 'test' or 'train' for evaluation + 'batch_size': 5 +} + +# Vocabulary Mapping +AA_VOCAB = { + '': 0, '': 1, '': 2, + 'G': 3, 'A': 4, 'S': 5, 'P': 6, 'V': 7, 'T': 8, 'C': 9, 'L': 10, + 'I': 11, 'N': 12, 'D': 13, 'Q': 14, 'K': 15, 'E': 16, 'M': 17, + 'H': 18, 'F': 19, 'R': 20, 'Y': 21, 'W': 22, + 'M(ox)': 23, 'C(cam)': 24, 'N(deam)': 25, 'Q(deam)': 26 +} +IDX_TO_AA = {v: k for k, v in AA_VOCAB.items()} + + +def resolve_latest_model(model_dir): + """Utility to fetch the latest checkpoint based on epoch numbering.""" + try: + models = sorted([f for f in os.listdir(model_dir) if f.endswith('.pth')], + key=lambda x: int(x.split('epoch')[1].split('.')[0])) + return os.path.join(model_dir, models[-1]) + except Exception as e: + print(f"[ERROR] Could not resolve model from {model_dir}: {e}") + return None + + +def decode_sequence(indices): + """Decodes tensor indices to peptide string, handling special tokens.""" + seq = [] + for idx in indices: + idx = idx.item() + if idx == 2: break # EOS token + if idx not in [0, 1]: # Skip PAD, SOS + seq.append(IDX_TO_AA.get(idx, '?')) + return ''.join(seq) + + +def generate_sequence_greedy(model, x, max_len=50): + """ + Performs greedy decoding strategy for sequence generation. + """ + model.eval() + with torch.no_grad(): + # Encode + logits = model.encoder(x) + z = (torch.sigmoid(logits) > 0.5).float() # Hard thresholding + + # Decode + hidden = model.decoder.latent_to_hidden(z).unsqueeze(0) + batch_size = x.size(0) + curr_input = torch.tensor([[1]] * batch_size, device=DEVICE) # token + + generated_seqs = [[] for _ in range(batch_size)] + finished = [False] * batch_size + + for _ in range(max_len): + embedded = model.decoder.embedding(curr_input) + output, hidden = model.decoder.gru(embedded, hidden) + pred_token = model.decoder.fc_out(output).argmax(dim=-1) + curr_input = pred_token + + for i in range(batch_size): + token = pred_token[i].item() + if token == 2: finished[i] = True + if not finished[i] and token != 1: + generated_seqs[i].append(token) + if all(finished): break + + return generated_seqs + + +if __name__ == "__main__": + latest_ckpt = resolve_latest_model(IO_CONFIG['model_dir']) + if not latest_ckpt: exit() + + print(f"[INFO] Loading checkpoint: {latest_ckpt}") + + # Data Loader + test_loader = get_dataloader( + IO_CONFIG['data_dir'], + IO_CONFIG['dataset_split'], + batch_size=IO_CONFIG['batch_size'], + max_files=1 + ) + + # Model Setup + model = PeptideQVAE(INPUT_DIM, HIDDEN_DIM, LATENT_DIM, VOCAB_SIZE).to(DEVICE) + model.load_state_dict(torch.load(latest_ckpt, map_location=DEVICE)) + model.eval() + + print("\n[EVAL] Inference Sample Comparison") + print("-" * 110) + print(f"{'Ground Truth Sequence':<40} | {'Predicted Sequence':<40} | {'Match Status'}") + print("-" * 110) + + sample_limit = 10 + processed_count = 0 + + with torch.no_grad(): + for x, y in test_loader: + x = x.to(DEVICE) + pred_indices = generate_sequence_greedy(model, x) + + for i in range(len(y)): + true_str = decode_sequence(y[i]) + pred_str = decode_sequence(torch.tensor(pred_indices[i])) + + status = "[MATCH]" if true_str == pred_str else "[DIFF]" + if len(pred_str) == 0: status = "[EMPTY]" + + print(f"{true_str:<40} | {pred_str:<40} | {status}") + + processed_count += 1 + if processed_count >= sample_limit: break + if processed_count >= sample_limit: break \ No newline at end of file diff --git a/reproduction_QBM_VAE/train_peptide.py b/reproduction_QBM_VAE/train_peptide.py new file mode 100644 index 0000000..f8ebaa2 --- /dev/null +++ b/reproduction_QBM_VAE/train_peptide.py @@ -0,0 +1,92 @@ +import os +import torch +import torch.optim as optim +from tqdm import tqdm +import kaiwu +from dataset_loader import get_dataloader +from models import PeptideQVAE + +# ================= CONFIGURATION ================= +# SDK Credentials +USER_ID = "91850531256946690" +SDK_CODE = "lTj5v0u67gyWsMfXxKAbiJPkkT6w7u" + +# Hyperparameters (Balanced Profile for CPU) +BATCH_SIZE = 32 +LEARNING_RATE = 1e-3 +NUM_EPOCHS = 30 +MAX_FILES = 1 # Shard limit for rapid iteration +HIDDEN_DIM = 512 +LATENT_DIM = 64 + +# IO Paths +DATA_DIR = "./processed_data_qbm_chunks" +SAVE_DIR = "./models/Peptide_QVAE_Balanced" + +# ================= INITIALIZATION ================= +print("[INIT] Initializing Kaiwu Quantum SDK...") +try: + kaiwu.license.init(user_id=USER_ID, sdk_code=SDK_CODE) + print("[INFO] License verified successfully.") +except Exception as e: + print(f"[WARN] License initialization failed: {e}") + +os.makedirs(SAVE_DIR, exist_ok=True) +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +print(f"[INIT] Runtime device: {device}") + +# Pipeline Setup +print(f"[DATA] Loading dataset (Limit: {MAX_FILES} shards)...") +train_loader = get_dataloader(DATA_DIR, 'train', batch_size=BATCH_SIZE, shuffle=True, max_files=MAX_FILES) + +print(f"[MODEL] Building architecture (H={HIDDEN_DIM}, L={LATENT_DIM})...") +model = PeptideQVAE( + input_dim=24501, + hidden_dim=HIDDEN_DIM, + latent_dim=LATENT_DIM, + kl_beta=0.001 +).to(device) + +optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) + +# ================= TRAINING LOOP ================= +print(f"[TRAIN] Starting training loop for {NUM_EPOCHS} epochs.") + +for epoch in range(1, NUM_EPOCHS + 1): + model.train() + running_loss = 0.0 + batch_counter = 0 + + pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{NUM_EPOCHS}") + + for x, y in pbar: + x, y = x.to(device), y.to(device) + optimizer.zero_grad() + + # Forward pass + seq_logits, z, logits_z, energy = model(x, y) + + # Loss computation + loss, ce, prior = model.compute_loss(seq_logits, y, logits_z, energy) + + # Backpropagation + loss.backward() + optimizer.step() + + running_loss += loss.item() + batch_counter += 1 + + pbar.set_postfix({ + 'L_Total': f"{loss.item():.2f}", + 'L_Recon': f"{ce.item():.2f}" + }) + + epoch_avg_loss = running_loss / max(1, batch_counter) + print(f"[LOG] Epoch {epoch} completed. Avg Loss: {epoch_avg_loss:.4f}") + + # Checkpointing + ckpt_path = os.path.join(SAVE_DIR, f"qvae_balanced_epoch{epoch}.pth") + torch.save(model.state_dict(), ckpt_path) + print(f"[CKPT] Model state saved to {ckpt_path}") + +print("[DONE] Training pipeline finished successfully.") \ No newline at end of file diff --git "a/reproduction_QBM_VAE/\344\273\243\347\240\201\350\257\264\346\230\216\346\226\207\346\241\243.docx" "b/reproduction_QBM_VAE/\344\273\243\347\240\201\350\257\264\346\230\216\346\226\207\346\241\243.docx" new file mode 100644 index 0000000..bd44340 Binary files /dev/null and "b/reproduction_QBM_VAE/\344\273\243\347\240\201\350\257\264\346\230\216\346\226\207\346\241\243.docx" differ diff --git "a/reproduction_QBM_VAE/\346\225\260\346\215\256\351\233\206\351\223\276\346\216\245\345\222\214\350\277\220\350\241\214\351\241\272\345\272\217\344\273\245\345\217\212\346\263\250\346\204\217\344\272\213\351\241\271\350\257\264\346\230\216.txt" "b/reproduction_QBM_VAE/\346\225\260\346\215\256\351\233\206\351\223\276\346\216\245\345\222\214\350\277\220\350\241\214\351\241\272\345\272\217\344\273\245\345\217\212\346\263\250\346\204\217\344\272\213\351\241\271\350\257\264\346\230\216.txt" new file mode 100644 index 0000000..dd5346b --- /dev/null +++ "b/reproduction_QBM_VAE/\346\225\260\346\215\256\351\233\206\351\223\276\346\216\245\345\222\214\350\277\220\350\241\214\351\241\272\345\272\217\344\273\245\345\217\212\346\263\250\346\204\217\344\272\213\351\241\271\350\257\264\346\230\216.txt" @@ -0,0 +1,10 @@ +顺序 +按照 data_preprocess.py (预处理) -> train_peptide.py (训练) -> predict.py (推理评估) 的顺序依次运行即可。 + +网盘链接 +通过网盘分享的文件:Protein +链接: https://pan.baidu.com/s/1QNWTQjm6ePT_GutppKGfyw?pwd=zzfr 提取码: zzfr +--来自百度网盘超级会员v4的分享 + +注意事项 +数据集和代码全部放到同一个目录下