Update generative-proof-of-concept-CPU-preprocessing-in-memory.py

david-thrower · web-flow · commit fc6cac7022b6 · 2025-10-17T15:58:05.000-04:00
Remove CerebrosNotGPTConfig, CerebrosNotGPT from the main script ...
diff --git a/generative-proof-of-concept-CPU-preprocessing-in-memory.py b/generative-proof-of-concept-CPU-preprocessing-in-memory.py
@@ -54,7 +54,11 @@ def objective(trial: optuna.Trial) -> float:
     import numpy as np
     from cerebros.simplecerebrosrandomsearch.simple_cerebros_random_search\
         import SimpleCerebrosRandomSearch
-    from cerebrosllmutils.llm_utils import prepare_data, InterleavedRoPE
+    from cerebrosllmutils.llm_utils import prepare_data, \
+                                           InterleavedRoPE, \
+                                           Perplexity, \
+                                           CerebrosNotGPTConfig, \
+                                           CerebrosNotGPT
     import pendulum
     from cerebros.units.units import DenseUnit
     from cerebros.denseautomlstructuralcomponent.dense_automl_structural_component\
@@ -583,37 +587,8 @@ def objective(trial: optuna.Trial) -> float:
         
         meta_trial_number = 42 # irrelevant unless in distributed training
         
-        # Custom metric: Perplexity:
-        
-        @tf.keras.utils.register_keras_serializable()
-        class Perplexity(tf.keras.metrics.Metric):
-            """
-            Computes perplexity, defined as e^(categorical crossentropy).
-            """
-            def __init__(self, name='perplexity', **kwargs):
-                super().__init__(name=name, **kwargs)
-                self.total_crossentropy = self.add_weight(name='total_crossentropy', initializer='zeros')
-                self.count = self.add_weight(name='count', initializer='zeros')
-        
-            def update_state(self, y_true, y_pred, sample_weight=None):
-                # Calculate categorical crossentropy
-                crossentropy = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
-        
-                # Update the running sum of crossentropy and the count of samples
-                self.total_crossentropy.assign_add(tf.reduce_sum(crossentropy))
-                self.count.assign_add(tf.cast(tf.shape(y_true)[0], dtype=tf.float32))
-        
-            def result(self):
-                # Compute the average crossentropy
-                average_crossentropy = self.total_crossentropy / self.count
-                # Compute perplexity as e^(average crossentropy)
-                return tf.exp(average_crossentropy)
-        
-            def reset_state(self):
-                # Reset the state variables
-                self.total_crossentropy.assign(0.0)
-                self.count.assign(0.0)
         
+        # Custom metric: Perplexity:
         perplexity_metric = Perplexity()
         
         cerebros_automl = SimpleCerebrosRandomSearch(
@@ -709,257 +684,9 @@ def reset_state(self):
         print("="*50)
 
 
-        # Register the config and model wrapper as serializable
-        @tf.keras.utils.register_keras_serializable()
-        class CerebrosNotGPTConfig:
-            def __init__(self, max_sequence_length=1536, padding_token=None):
-                self.max_sequence_length = max_sequence_length
-                self.padding_token = padding_token
-            
-            def get_config(self):
-                return {
-                    'max_sequence_length': self.max_sequence_length,
-                    'padding_token': self.padding_token
-                }
-            
-            @classmethod
-            def from_config(cls, config):
-                return cls(**config)
-        
-        @tf.keras.utils.register_keras_serializable()
-        class CerebrosNotGPT(tf.keras.Model):
-            def __init__(self, config, **kwargs):
-                super().__init__(**kwargs)
-                self.config = config
-                self.max_sequence_length = config.max_sequence_length
-                self.padding_token = config.padding_token
-                # Make self.model = the reconstituted model (constant)
-                self.model = best_model_found # reconstituted_model
-            
-            def get_config(self):
-                return {
-                    'config': self.config.get_config()
-                }
-            
-            @classmethod
-            def from_config(cls, config):
-                config_obj = CerebrosNotGPTConfig.from_config(config['config'])
-                return cls(config=config_obj)
-
-            @staticmethod
-            def apply_top_k_probs(probs, k):
-                if k is None or k <= 0:
-                    return probs
-                # Flatten and argsort for indices
-                sorted_indices = tf.argsort(probs, direction='DESCENDING')
-                keep_indices = sorted_indices[:k]
-                mask = tf.zeros_like(probs, dtype=tf.bool)
-                mask = tf.tensor_scatter_nd_update(mask, tf.reshape(keep_indices, (-1,1)), tf.ones((k,), dtype=tf.bool))
-                filtered_probs = tf.where(mask, probs, tf.zeros_like(probs))
-                # Renormalize
-                filtered_probs = filtered_probs / tf.reduce_sum(filtered_probs)
-                return filtered_probs
-
-            @staticmethod
-            def apply_top_p_probs(probs, p):
-                if p is None or p >= 1.0:
-                    return probs
-                sorted_indices = tf.argsort(probs, direction='DESCENDING')
-                sorted_probs = tf.gather(probs, sorted_indices)
-                cumulative_probs = tf.cumsum(sorted_probs)
-                mask = cumulative_probs <= p
-                # Always keep at least 1 token
-                mask = tf.concat([tf.constant([True]), mask[1:]], axis=0)
-                keep_indices = tf.boolean_mask(sorted_indices, mask)
-                filtered_probs = tf.where(tf.reduce_any(tf.equal(tf.range(tf.shape(probs)[0])[:,None], keep_indices), axis=1), probs, tf.zeros_like(probs))
-                # Renormalize
-                filtered_probs = filtered_probs / tf.reduce_sum(filtered_probs)
-                return filtered_probs
-
-
-            def generate(self, 
-                         token_ids,
-                         do_sample=False,
-                         max_new_tokens=None,
-                         temperature=1.0,
-                         top_k=None,
-                         top_p=None,
-                         frequency_penalty=None,
-                         presence_penalty=None,
-                         repetition_penalty=None):
-                """
-                Generate text autoregressively from token IDs.
-                Applies filtering in sequence: penalties -> temperature -> top-k -> top-p
-                """
-                # Convert token_ids to list if it's not already
-                if not isinstance(token_ids, list):
-                    token_ids = list(token_ids)
-                    
-                # Determine the actual maximum number of new tokens
-                if max_new_tokens is None:
-                    max_new_tokens = self.max_sequence_length - len(token_ids)
-                else:
-                    max_new_tokens = min(max_new_tokens, self.max_sequence_length - len(token_ids))
-                    
-                # Initialize the generated tokens list
-                generated_tokens = []
-                current_tokens = token_ids.copy()
-                
-                # Autoregressive generation loop
-                for _ in range(max_new_tokens):
-                    # Pad or truncate to max_sequence_length
-                    if len(current_tokens) > self.max_sequence_length:
-                        input_tokens = current_tokens[-self.max_sequence_length:]
-                    else:
-                        padding_needed = self.max_sequence_length - len(current_tokens)
-                        input_tokens = current_tokens + [self.padding_token] * padding_needed
-                    
-                    # Convert to tensor and get model prediction
-                    input_tensor = tf.constant([input_tokens], dtype=tf.int32)
-                    probs_nested = self.model(input_tensor)
-                    probs = probs_nested[0]  # Already softmax probabilities (NOT logits as comment says)
-                    logits = tf.math.log(probs + 10 ** -20)  # Convert to logits for penalty application
-            
-                    if do_sample:
-                        # Apply repetition/frequency/presence penalties to logits
-                        if frequency_penalty is not None or presence_penalty is not None:
-                            # Collect token counts from current_tokens
-                            token_counts = {}
-                            for t in current_tokens:
-                                token_counts[t] = token_counts.get(t, 0) + 1
-            
-                            # Prepare penalty tensor
-                            vocab_size = tf.shape(logits)[0]
-                            penalties = tf.zeros_like(logits)
-            
-                            for token_id, count in token_counts.items():
-                                if token_id >= vocab_size:
-                                    continue
-                                penalty = 0.0
-                                if presence_penalty is not None:
-                                    penalty += presence_penalty
-                                if frequency_penalty is not None:
-                                    penalty += frequency_penalty * count
-            
-                                penalties = tf.tensor_scatter_nd_add(
-                                    penalties,
-                                    [[token_id]],
-                                    [penalty]
-                                )
-            
-                            # Subtract penalties from logits
-                            logits = logits - penalties
-            
-                        # Apply repetition penalty (standard approach)
-                        if repetition_penalty is not None and repetition_penalty != 1.0:
-                            # Collect unique tokens that have appeared
-                            unique_tokens = list(set(current_tokens))
-                            vocab_size = tf.shape(logits)[0]
-                            
-                            for token_id in unique_tokens:
-                                if token_id < vocab_size:
-                                    # Divide logits of repeated tokens by penalty
-                                    logits = tf.tensor_scatter_nd_update(
-                                        logits,
-                                        [[token_id]],
-                                        [logits[token_id] / repetition_penalty]
-                                    )
-            
-                        # Apply temperature
-                        if temperature != 1.0:
-                            logits = logits / temperature
-                        
-                        # Convert to probabilities
-                        probs = tf.nn.softmax(logits)
-                        
-                        # Apply top-k filtering (if specified)
-                        if top_k is not None and top_k > 0:
-                            k = min(top_k, tf.shape(probs)[0])
-                            # Get top-k values and indices
-                            top_k_values, top_k_indices = tf.nn.top_k(probs, k=k, sorted=False)
-                            # Create mask for top-k positions
-                            top_k_mask = tf.scatter_nd(
-                                tf.expand_dims(top_k_indices, 1),
-                                tf.ones_like(top_k_values, dtype=tf.bool),
-                                tf.shape(probs)
-                            )
-                            # Zero out non-top-k probabilities
-                            probs = tf.where(top_k_mask, probs, tf.zeros_like(probs))
-                            # Renormalize
-                            probs = probs / tf.reduce_sum(probs)
-                            print(f">>> After top_k: {tf.shape(probs)} shape, {tf.reduce_sum(tf.cast(probs > 1e-8, tf.int32))} non-zero probs")
-                        
-                        # Apply top-p filtering (if specified)
-                        if top_p is not None and top_p < 1.0:
-                            # Sort probabilities in descending order
-                            sorted_indices = tf.argsort(probs, direction='DESCENDING')
-                            sorted_probs = tf.gather(probs, sorted_indices)
-                            cumulative_probs = tf.cumsum(sorted_probs)
-                            # Create mask for top-p
-                            mask = cumulative_probs <= top_p
-                            # Always keep at least one token
-                            mask = tf.concat([tf.constant([True]), mask[1:]], axis=0)
-                            # Get indices to keep
-                            keep_indices = tf.boolean_mask(sorted_indices, mask)
-                            # Create mask for original indices
-                            filter_mask = tf.scatter_nd(
-                                tf.expand_dims(keep_indices, 1),
-                                tf.ones_like(keep_indices, dtype=tf.bool),
-                                tf.shape(probs)
-                            )
-                            # Apply mask and renormalize
-                            probs = tf.where(filter_mask, probs, tf.zeros_like(probs))
-                            probs = probs / tf.reduce_sum(probs)
-                            print(f">>> After top_p: {tf.shape(probs)} shape, {tf.reduce_sum(tf.cast(probs > 1e-8, tf.int32))} non-zero probs")
-                        
-                        # Sample from the final filtered distribution
-                        # Get non-zero indices and their probabilities
-                        non_zero_mask = probs > 1e-8
-                        if tf.reduce_any(non_zero_mask):
-                            filtered_indices = tf.where(non_zero_mask)[:, 0]  # Get indices
-                            filtered_probs = tf.boolean_mask(probs, non_zero_mask)  # Get probabilities
-                            # Sample
-                            sampled_local_index = tf.random.categorical(tf.math.log(filtered_probs)[None, :], 1)[0, 0]
-                            # Map back to vocabulary index
-                            next_token_id = int(filtered_indices[sampled_local_index].numpy())
-                        else:
-                            # Fallback if all probabilities are zero
-                            warn("Token sampling had to revert to greedy sampling, because no probs had a value > 0, unexpected")
-                            next_token_id = int(tf.argmax(probs, axis=-1).numpy())
-                            
-                    else:
-                        # Greedy sampling (argmax) - apply repetition penalty if needed
-                        if repetition_penalty is not None and repetition_penalty != 1.0:
-                            unique_tokens = list(set(current_tokens))
-                            vocab_size = tf.shape(logits)[0]
-                            for token_id in unique_tokens:
-                                if token_id < vocab_size:
-                                    logits = tf.tensor_scatter_nd_update(
-                                        logits,
-                                        [[token_id]],
-                                        [logits[token_id] / repetition_penalty]
-                                    )
-                        
-                        next_token_id = int(tf.argmax(logits, axis=-1).numpy())
-            
-                    # Check for termination condition
-                    if next_token_id == self.padding_token:
-                        break
-                        
-                    # Add to generated tokens and update current tokens
-                    generated_tokens.append(int(next_token_id))
-                    current_tokens.append(int(next_token_id))
-                    
-                    # Check if we've reached max sequence length
-                    if len(current_tokens) >= self.max_sequence_length:
-                        break
-                
-                return token_ids + generated_tokens
 
+        
 
-            def call(self, inputs):
-                # This is just for compatibility, the main logic is in generate()
-                return self.model(inputs)
         
         # Replace the generation code block with this: