Update directed evo for using hybrid model

niklases · niklases · commit 43ccba5585fe · 2025-04-22T20:34:24.000+02:00
Using WT relative variant values now
However, negative values are not accepted yet
diff --git a/pypef/hybrid/hybrid_model.py b/pypef/hybrid/hybrid_model.py
@@ -1471,21 +1471,17 @@ def predict_directed_evolution(
         y_pred = get_delta_e_statistical_model(xs, x_wt)
     else:  # model_type == 'Hybrid': Hybrid model input requires params 
         #from PLMC or GREMLIN model plus optional LLM input
-        print(variant, variant_sequence)
         xs, variant, variant_sequence, *_ = plmc_or_gremlin_encoding(
             variant, variant_sequence, None, encoder, 
             verbose=False, use_global_model=True
         )
-        print(variant_sequence)
         if not list(xs):
             return 'skip'
         if model.llm_model_input is None:
             x_llm = None
         else:
             x_llm = llm_embedder(model.llm_model_input, variant_sequence)
         try:
-            print(np.shape(xs), np.shape(x_llm),  np.atleast_2d(x_llm))
-            #exit()
             y_pred = model.hybrid_prediction(np.atleast_2d(xs), np.atleast_2d(x_llm))[0]
         except ValueError as e:
             raise e  # TODO: Check sequences / mutations
diff --git a/pypef/utils/directed_evolution.py b/pypef/utils/directed_evolution.py
@@ -123,6 +123,7 @@ def __init__(  # Instance attributes
         self.negative = negative
         self.de_step_counter = 0  # DE steps
         self.traj_counter = 0  # Trajectory counter
+        logger.info(f"Directed evolution acceptance \"temperature\": {self.temp}")
 
     def mutate_sequence(
             self,
@@ -216,6 +217,7 @@ def in_silico_de(self):
         y_traj.append(self.y_wt)
         s_traj.append(self.s_wt)
         accepted = 0
+        wt_prediction = None
         logger.info(f"Step 0: WT --> {self.y_wt:.3f}")
         for iteration in range(self.num_iterations):  # num_iterations
             self.de_step_counter = iteration
@@ -248,20 +250,30 @@ def in_silico_de(self):
                 )
 
             else:  # hybrid modeling and prediction
+                if wt_prediction is None:
+                    while wt_prediction is None or wt_prediction == 'skip':
+                        wt_prediction = predict_directed_evolution(
+                            encoder=self.dca_encoder,
+                            variant=self.s_wt[int(new_variant[:-1]) - 1] + new_variant[:-1] + 
+                                self.s_wt[int(new_variant[:-1]) - 1],  # WT, e.g. F17F
+                            variant_sequence=self.s_wt,
+                            hybrid_model_data_pkl=self.model
+                        )
                 predictions = predict_directed_evolution(
                     encoder=self.dca_encoder,
                     variant=self.s_wt[int(new_variant[:-1]) - 1] + new_variant,
                     variant_sequence=new_sequence,
                     hybrid_model_data_pkl=self.model
                 )
+            print(wt_prediction)
             if predictions != 'skip':
                 logger.info(f"Step {self.de_step_counter + 1}: "
-                            f"{self.s_wt[int(new_variant[:-1]) - 1]}{new_variant} --> {predictions[0][0]:.3f}")
+                            f"{self.s_wt[int(new_variant[:-1]) - 1]}{new_variant} --> {predictions[0][0] - wt_prediction[0][0]:.3f}")
             else:  # skip if variant cannot be encoded by DCA-based encoding technique
                 logger.info(f"Step {self.de_step_counter + 1}: "
                             f"{self.s_wt[int(new_variant[:-1]) - 1]}{new_variant} --> {predictions}")
                 continue
-            new_y, new_var = predictions[0][0], predictions[0][1]  # new_var == new_variant nonetheless
+            new_y, new_var = predictions[0][0] - wt_prediction[0][0], predictions[0][1]  # new_var == new_variant nonetheless
             # probability function for trial sequence
             # The lower the fitness (y) of the new variant, the higher are the chances to get excluded
             with warnings.catch_warnings():  # catching Overflow warning
@@ -275,10 +287,13 @@ def in_silico_de(self):
             p = min(1, boltz)
             rand_var = random.random()  # random float between 0 and 1
             if rand_var < p:  # Metropolis-Hastings update selection criterion, else do nothing (do not accept variant)
+                logger.info(f'Accepted variant {new_var} [current evolutionary trajectory: {v_traj}]')
                 v_traj.append(new_var)       # update the variant naming trajectory
                 y_traj.append(new_y)         # update the fitness trajectory records
                 s_traj.append(new_sequence)  # update the sequence trajectory records
                 accepted += 1
+            else: 
+                logger.info(f'Rejected variant {new_var} [current evolutionary trajectory: {v_traj}]')
 
         self.assert_trajectory_sequences(v_traj, s_traj)