diff --git a/src/ProgramSynthesis/Engines/CodeBERT.cs b/src/ProgramSynthesis/Engines/CodeBERT.cs
new file mode 100644
index 000000000..496df176a
--- /dev/null
+++ b/src/ProgramSynthesis/Engines/CodeBERT.cs
@@ -0,0 +1,398 @@
+using AiDotNet.Interfaces;
+using AiDotNet.LinearAlgebra;
+using AiDotNet.LossFunctions;
+using AiDotNet.Models;
+using AiDotNet.NeuralNetworks;
+using AiDotNet.NeuralNetworks.Helpers;
+using AiDotNet.NeuralNetworks.Layers;
+using AiDotNet.Optimizers;
+using AiDotNet.ProgramSynthesis.Enums;
+using AiDotNet.ProgramSynthesis.Interfaces;
+using AiDotNet.ProgramSynthesis.Models;
+
+namespace AiDotNet.ProgramSynthesis.Engines;
+
+///
+/// CodeBERT is a bimodal pre-trained model for programming and natural languages.
+///
+/// The numeric type used for calculations (e.g., double, float).
+///
+///
+/// CodeBERT is designed to understand both code and natural language. It uses a
+/// transformer-based encoder architecture pre-trained on code-documentation pairs
+/// from GitHub. It excels at tasks like code search, code documentation generation,
+/// and code completion.
+///
+/// For Beginners: CodeBERT is an AI that understands programming languages.
+///
+/// Just like BERT understands English, CodeBERT understands code. It's been trained
+/// on millions of code examples from GitHub and can:
+/// - Understand what code does
+/// - Find similar code
+/// - Complete code as you write
+/// - Generate documentation
+/// - Translate between code and descriptions
+///
+/// Think of it as an AI that's read millions of lines of code and learned the
+/// patterns of good programming, just like you learn language by reading books.
+///
+///
+public class CodeBERT : NeuralNetworkBase, ICodeModel
+{
+ private readonly CodeSynthesisArchitecture _architecture;
+ private IGradientBasedOptimizer, Tensor> _optimizer;
+
+ ///
+ /// Gets the target programming language for this model.
+ ///
+ public ProgramLanguage TargetLanguage => _architecture.TargetLanguage;
+
+ ///
+ /// Gets the maximum sequence length (in tokens) that the model can process.
+ ///
+ public int MaxSequenceLength => _architecture.MaxSequenceLength;
+
+ ///
+ /// Gets the vocabulary size of the model.
+ ///
+ public int VocabularySize => _architecture.VocabularySize;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The architecture configuration for the model.
+ /// Optional loss function (defaults to cross-entropy for code tasks).
+ /// Optional optimizer (defaults to Adam optimizer).
+ ///
+ ///
+ /// Creates a new CodeBERT model with the specified architecture. The model will
+ /// be initialized with encoder layers suitable for code understanding tasks.
+ ///
+ /// For Beginners: This creates a new CodeBERT model.
+ ///
+ /// You provide:
+ /// - Architecture: The blueprint (size, layers, etc.)
+ /// - Loss function: How to measure mistakes (optional)
+ /// - Optimizer: How to improve from mistakes (optional)
+ ///
+ /// Like setting up a new student with a curriculum and teaching method.
+ ///
+ ///
+ public CodeBERT(
+ CodeSynthesisArchitecture architecture,
+ ILossFunction? lossFunction = null,
+ IGradientBasedOptimizer, Tensor>? optimizer = null)
+ : base(architecture, lossFunction ?? new CrossEntropyLoss())
+ {
+ _architecture = architecture;
+ _optimizer = optimizer ?? new AdamOptimizer, Tensor>(this);
+ InitializeLayers();
+ }
+
+ ///
+ /// Initializes the layers of the CodeBERT model.
+ ///
+ ///
+ ///
+ /// Sets up the encoder layers including embeddings, positional encoding,
+ /// multi-head attention, and feed-forward networks based on the architecture.
+ ///
+ /// For Beginners: This builds the internal structure of CodeBERT.
+ ///
+ /// Creates all the layers that process code:
+ /// - Embedding layer: Converts code tokens to numbers
+ /// - Attention layers: Let the model focus on important parts
+ /// - Processing layers: Transform and analyze the code
+ ///
+ /// Like assembling the components of a machine according to the blueprint.
+ ///
+ ///
+ protected override void InitializeLayers()
+ {
+ if (Architecture.Layers != null && Architecture.Layers.Count > 0)
+ {
+ Layers.AddRange(Architecture.Layers);
+ ValidateCustomLayers(Layers);
+ }
+ else
+ {
+ // Create default CodeBERT encoder layers
+ // Embedding layer for code tokens
+ Layers.Add(new EmbeddingLayer(
+ vocabularySize: _architecture.VocabularySize,
+ embeddingDimension: _architecture.ModelDimension,
+ maxSequenceLength: _architecture.MaxSequenceLength,
+ usePositionalEncoding: _architecture.UsePositionalEncoding));
+
+ // Add encoder layers (multi-head attention + feed-forward)
+ for (int i = 0; i < _architecture.NumEncoderLayers; i++)
+ {
+ // Multi-head self-attention
+ Layers.Add(new MultiHeadAttentionLayer(
+ modelDimension: _architecture.ModelDimension,
+ numHeads: _architecture.NumHeads,
+ dropout: _architecture.DropoutRate));
+
+ // Layer normalization after attention
+ Layers.Add(new LayerNormalizationLayer(
+ normalizedShape: new[] { _architecture.ModelDimension }));
+
+ // Feed-forward network
+ Layers.Add(new DenseLayer(
+ inputSize: _architecture.ModelDimension,
+ outputSize: _architecture.FeedForwardDimension,
+ activationFunction: new GELUActivationFunction()));
+
+ Layers.Add(new DenseLayer(
+ inputSize: _architecture.FeedForwardDimension,
+ outputSize: _architecture.ModelDimension,
+ activationFunction: null));
+
+ // Layer normalization after feed-forward
+ Layers.Add(new LayerNormalizationLayer(
+ normalizedShape: new[] { _architecture.ModelDimension }));
+
+ // Dropout for regularization
+ Layers.Add(new DropoutLayer(_architecture.DropoutRate));
+ }
+
+ // Final output projection layer
+ Layers.Add(new DenseLayer(
+ inputSize: _architecture.ModelDimension,
+ outputSize: _architecture.VocabularySize,
+ activationFunction: null));
+ }
+ }
+
+ ///
+ /// Encodes source code into a vector representation.
+ ///
+ /// The source code to encode.
+ /// A tensor representing the encoded code.
+ ///
+ ///
+ /// Converts source code text into a numerical tensor that captures the semantic
+ /// meaning of the code. This encoding can be used for downstream tasks like
+ /// code search or classification.
+ ///
+ /// For Beginners: This converts code text into numbers the AI understands.
+ ///
+ /// Code is just text to a computer, but the AI needs numbers to work with.
+ /// This method:
+ /// 1. Breaks code into tokens (like words)
+ /// 2. Converts tokens to numbers
+ /// 3. Processes them through the model
+ /// 4. Returns a numerical representation that captures the code's meaning
+ ///
+ /// Like translating a recipe into a numerical rating system while keeping the essence.
+ ///
+ ///
+ public Tensor EncodeCode(string code)
+ {
+ // Tokenize and convert to tensor (simplified - in production, use proper tokenizer)
+ var input = TokenizeCode(code);
+ return Predict(input);
+ }
+
+ ///
+ /// Decodes a vector representation back into source code.
+ ///
+ /// The encoded representation to decode.
+ /// The decoded source code as a string.
+ ///
+ ///
+ /// Converts the model's numerical representation back into human-readable code.
+ /// This is the reverse of the encoding process.
+ ///
+ /// For Beginners: This converts the AI's numbers back to readable code.
+ ///
+ /// After the AI processes code as numbers, we need to convert back to text.
+ /// This method reverses the encoding process to produce readable code.
+ ///
+ ///
+ public string DecodeCode(Tensor encoding)
+ {
+ // Simplified decoding - in production, use proper detokenizer
+ return DetokenizeCode(encoding);
+ }
+
+ ///
+ /// Performs a code-related task on the input code.
+ ///
+ /// The source code to process.
+ /// The type of task to perform.
+ /// The result of the task as a string.
+ ///
+ ///
+ /// Executes various code-related tasks such as completion, summarization,
+ /// bug detection, etc. The implementation adapts based on the task type.
+ ///
+ /// For Beginners: This is the main method for doing things with code.
+ ///
+ /// Tell it what you want done (completion, bug finding, etc.), and it
+ /// processes the code and returns the result. Like a Swiss Army knife
+ /// for code - one tool, many functions.
+ ///
+ ///
+ public string PerformTask(string code, CodeTask task)
+ {
+ var encoding = EncodeCode(code);
+
+ // Task-specific processing would go here
+ // For now, return a placeholder implementation
+ return task switch
+ {
+ CodeTask.Completion => PerformCompletion(encoding),
+ CodeTask.Summarization => PerformSummarization(encoding),
+ CodeTask.BugDetection => PerformBugDetection(encoding),
+ _ => DecodeCode(encoding)
+ };
+ }
+
+ ///
+ /// Gets embeddings for code tokens.
+ ///
+ /// The source code to get embeddings for.
+ /// A tensor containing token embeddings.
+ ///
+ ///
+ /// Returns the embedding vectors for each token in the code. These embeddings
+ /// capture semantic similarity - similar code constructs have similar embeddings.
+ ///
+ /// For Beginners: This gets the numerical representation of each code piece.
+ ///
+ /// Each word/symbol in code gets a vector of numbers that represents its meaning.
+ /// Similar code pieces get similar numbers. Useful for finding related code or
+ /// understanding code structure.
+ ///
+ ///
+ public Tensor GetEmbeddings(string code)
+ {
+ var input = TokenizeCode(code);
+ // Return embeddings from the first layer (embedding layer)
+ return Layers[0].Forward(input);
+ }
+
+ ///
+ /// Makes a prediction on the input tensor.
+ ///
+ /// The input tensor.
+ /// The output tensor.
+ public override Tensor Predict(Tensor input)
+ {
+ SetTrainingMode(false);
+
+ var output = input;
+ foreach (var layer in Layers)
+ {
+ output = layer.Forward(output);
+ }
+
+ return output;
+ }
+
+ ///
+ /// Trains the model on a single example.
+ ///
+ /// The input tensor.
+ /// The expected output tensor.
+ public override void Train(Tensor input, Tensor expectedOutput)
+ {
+ SetTrainingMode(true);
+
+ // Forward pass
+ var output = Predict(input);
+
+ // Calculate loss
+ var loss = LossFunction.ComputeLoss(output, expectedOutput);
+ AddLoss(loss);
+
+ // Backward pass
+ var gradient = LossFunction.ComputeGradient(output, expectedOutput);
+
+ for (int i = Layers.Count - 1; i >= 0; i--)
+ {
+ gradient = Layers[i].Backward(gradient);
+ }
+
+ // Update parameters using optimizer
+ _optimizer.UpdateParameters();
+ }
+
+ ///
+ /// Gets metadata about the model.
+ ///
+ /// Model metadata.
+ public override ModelMetadata GetModelMetadata()
+ {
+ return new ModelMetadata
+ {
+ ModelType = "CodeBERT",
+ ParameterCount = ParameterCount,
+ InputSize = _architecture.InputSize,
+ OutputSize = _architecture.OutputSize,
+ TrainingLosses = GetLosses()
+ };
+ }
+
+ protected override void SerializeNetworkSpecificData(BinaryWriter writer)
+ {
+ // Serialize CodeBERT-specific data
+ writer.Write((int)_architecture.TargetLanguage);
+ writer.Write(_architecture.MaxSequenceLength);
+ writer.Write(_architecture.VocabularySize);
+ }
+
+ protected override void DeserializeNetworkSpecificData(BinaryReader reader)
+ {
+ // Deserialize CodeBERT-specific data
+ var targetLanguage = (ProgramLanguage)reader.ReadInt32();
+ var maxSeqLength = reader.ReadInt32();
+ var vocabSize = reader.ReadInt32();
+ }
+
+ protected override IFullModel, Tensor> CreateNewInstance()
+ {
+ return new CodeBERT(_architecture, LossFunction, _optimizer);
+ }
+
+ // Helper methods for tokenization (simplified implementations)
+ private Tensor TokenizeCode(string code)
+ {
+ // Simplified tokenization - in production, use a proper tokenizer like BPE
+ // This is a placeholder that creates a tensor from code
+ var tokens = code.Split(new[] { ' ', '\n', '\t' }, StringSplitOptions.RemoveEmptyEntries);
+ var tokenIds = new int[Math.Min(tokens.Length, _architecture.MaxSequenceLength)];
+
+ for (int i = 0; i < tokenIds.Length; i++)
+ {
+ tokenIds[i] = Math.Abs(tokens[i].GetHashCode()) % _architecture.VocabularySize;
+ }
+
+ return Tensor.FromArray(Array.ConvertAll(tokenIds, id => (T)Convert.ChangeType(id, typeof(T))));
+ }
+
+ private string DetokenizeCode(Tensor encoding)
+ {
+ // Simplified detokenization - placeholder implementation
+ return "// Generated code";
+ }
+
+ private string PerformCompletion(Tensor encoding)
+ {
+ // Placeholder for code completion logic
+ return "// Completed code";
+ }
+
+ private string PerformSummarization(Tensor encoding)
+ {
+ // Placeholder for code summarization logic
+ return "// Code summary";
+ }
+
+ private string PerformBugDetection(Tensor encoding)
+ {
+ // Placeholder for bug detection logic
+ return "// No bugs detected";
+ }
+}
diff --git a/src/ProgramSynthesis/Engines/CodeT5.cs b/src/ProgramSynthesis/Engines/CodeT5.cs
new file mode 100644
index 000000000..371a19b8e
--- /dev/null
+++ b/src/ProgramSynthesis/Engines/CodeT5.cs
@@ -0,0 +1,338 @@
+using AiDotNet.Interfaces;
+using AiDotNet.LinearAlgebra;
+using AiDotNet.LossFunctions;
+using AiDotNet.Models;
+using AiDotNet.NeuralNetworks;
+using AiDotNet.NeuralNetworks.Helpers;
+using AiDotNet.NeuralNetworks.Layers;
+using AiDotNet.Optimizers;
+using AiDotNet.ProgramSynthesis.Enums;
+using AiDotNet.ProgramSynthesis.Interfaces;
+using AiDotNet.ProgramSynthesis.Models;
+
+namespace AiDotNet.ProgramSynthesis.Engines;
+
+///
+/// CodeT5 is an encoder-decoder model for code understanding and generation.
+///
+/// The numeric type used for calculations (e.g., double, float).
+///
+///
+/// CodeT5 is based on the T5 (Text-To-Text Transfer Transformer) architecture adapted
+/// for code. It uses an encoder-decoder structure that can handle both code understanding
+/// and generation tasks. It's particularly effective for code translation, summarization,
+/// and generation from natural language descriptions.
+///
+/// For Beginners: CodeT5 can both understand AND generate code.
+///
+/// Unlike CodeBERT which mainly understands code, CodeT5 can also create it:
+/// - Understand: Read and analyze code (encoder)
+/// - Generate: Write new code (decoder)
+///
+/// This makes it powerful for tasks like:
+/// - Translating Python to Java
+/// - Generating code from English descriptions
+/// - Creating documentation from code
+/// - Fixing bugs by rewriting code
+///
+/// Think of it as both a reader and a writer, not just a reader.
+///
+///
+public class CodeT5 : NeuralNetworkBase, ICodeModel
+{
+ private readonly CodeSynthesisArchitecture _architecture;
+ private IGradientBasedOptimizer, Tensor> _optimizer;
+
+ public ProgramLanguage TargetLanguage => _architecture.TargetLanguage;
+ public int MaxSequenceLength => _architecture.MaxSequenceLength;
+ public int VocabularySize => _architecture.VocabularySize;
+
+ ///
+ /// Gets the number of encoder layers.
+ ///
+ ///
+ ///
+ /// The encoder processes and understands the input code or text.
+ ///
+ /// For Beginners: Encoder layers read and understand the input.
+ ///
+ /// These layers analyze and comprehend what you give the model,
+ /// like reading comprehension in school.
+ ///
+ ///
+ public int NumEncoderLayers => _architecture.NumEncoderLayers;
+
+ ///
+ /// Gets the number of decoder layers.
+ ///
+ ///
+ ///
+ /// The decoder generates the output code based on the encoder's understanding.
+ ///
+ /// For Beginners: Decoder layers write the output.
+ ///
+ /// After understanding the input (encoder), these layers generate
+ /// the response, like writing an essay based on your understanding.
+ ///
+ ///
+ public int NumDecoderLayers => _architecture.NumDecoderLayers;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The architecture configuration.
+ /// Optional loss function.
+ /// Optional optimizer.
+ ///
+ ///
+ /// Creates a new CodeT5 model with encoder-decoder architecture. The model
+ /// can both understand existing code and generate new code.
+ ///
+ /// For Beginners: This creates a new CodeT5 model.
+ ///
+ /// CodeT5 needs both encoder and decoder layers, so make sure your
+ /// architecture specifies both (NumEncoderLayers and NumDecoderLayers).
+ ///
+ ///
+ public CodeT5(
+ CodeSynthesisArchitecture architecture,
+ ILossFunction? lossFunction = null,
+ IGradientBasedOptimizer, Tensor>? optimizer = null)
+ : base(architecture, lossFunction ?? new CrossEntropyLoss())
+ {
+ _architecture = architecture;
+ _optimizer = optimizer ?? new AdamOptimizer, Tensor>(this);
+
+ if (architecture.NumDecoderLayers == 0)
+ {
+ Console.WriteLine("Warning: CodeT5 works best with decoder layers (NumDecoderLayers > 0).");
+ }
+
+ InitializeLayers();
+ }
+
+ protected override void InitializeLayers()
+ {
+ if (Architecture.Layers != null && Architecture.Layers.Count > 0)
+ {
+ Layers.AddRange(Architecture.Layers);
+ ValidateCustomLayers(Layers);
+ }
+ else
+ {
+ // Shared embedding layer
+ Layers.Add(new EmbeddingLayer(
+ vocabularySize: _architecture.VocabularySize,
+ embeddingDimension: _architecture.ModelDimension,
+ maxSequenceLength: _architecture.MaxSequenceLength,
+ usePositionalEncoding: _architecture.UsePositionalEncoding));
+
+ // Encoder layers
+ for (int i = 0; i < _architecture.NumEncoderLayers; i++)
+ {
+ Layers.Add(new MultiHeadAttentionLayer(
+ modelDimension: _architecture.ModelDimension,
+ numHeads: _architecture.NumHeads,
+ dropout: _architecture.DropoutRate));
+
+ Layers.Add(new LayerNormalizationLayer(
+ normalizedShape: new[] { _architecture.ModelDimension }));
+
+ Layers.Add(new DenseLayer(
+ inputSize: _architecture.ModelDimension,
+ outputSize: _architecture.FeedForwardDimension,
+ activationFunction: new GELUActivationFunction()));
+
+ Layers.Add(new DenseLayer(
+ inputSize: _architecture.FeedForwardDimension,
+ outputSize: _architecture.ModelDimension,
+ activationFunction: null));
+
+ Layers.Add(new LayerNormalizationLayer(
+ normalizedShape: new[] { _architecture.ModelDimension }));
+
+ Layers.Add(new DropoutLayer(_architecture.DropoutRate));
+ }
+
+ // Decoder layers (if specified)
+ for (int i = 0; i < _architecture.NumDecoderLayers; i++)
+ {
+ // Self-attention in decoder
+ Layers.Add(new MultiHeadAttentionLayer(
+ modelDimension: _architecture.ModelDimension,
+ numHeads: _architecture.NumHeads,
+ dropout: _architecture.DropoutRate));
+
+ Layers.Add(new LayerNormalizationLayer(
+ normalizedShape: new[] { _architecture.ModelDimension }));
+
+ // Cross-attention (decoder attending to encoder)
+ Layers.Add(new MultiHeadAttentionLayer(
+ modelDimension: _architecture.ModelDimension,
+ numHeads: _architecture.NumHeads,
+ dropout: _architecture.DropoutRate));
+
+ Layers.Add(new LayerNormalizationLayer(
+ normalizedShape: new[] { _architecture.ModelDimension }));
+
+ // Feed-forward
+ Layers.Add(new DenseLayer(
+ inputSize: _architecture.ModelDimension,
+ outputSize: _architecture.FeedForwardDimension,
+ activationFunction: new GELUActivationFunction()));
+
+ Layers.Add(new DenseLayer(
+ inputSize: _architecture.FeedForwardDimension,
+ outputSize: _architecture.ModelDimension,
+ activationFunction: null));
+
+ Layers.Add(new LayerNormalizationLayer(
+ normalizedShape: new[] { _architecture.ModelDimension }));
+
+ Layers.Add(new DropoutLayer(_architecture.DropoutRate));
+ }
+
+ // Output projection
+ Layers.Add(new DenseLayer(
+ inputSize: _architecture.ModelDimension,
+ outputSize: _architecture.VocabularySize,
+ activationFunction: null));
+ }
+ }
+
+ public Tensor EncodeCode(string code)
+ {
+ var input = TokenizeCode(code);
+ return Predict(input);
+ }
+
+ public string DecodeCode(Tensor encoding)
+ {
+ return DetokenizeCode(encoding);
+ }
+
+ public string PerformTask(string code, CodeTask task)
+ {
+ var encoding = EncodeCode(code);
+
+ return task switch
+ {
+ CodeTask.Generation => PerformGeneration(code),
+ CodeTask.Translation => PerformTranslation(code),
+ CodeTask.Summarization => PerformSummarization(code),
+ CodeTask.Refactoring => PerformRefactoring(code),
+ _ => DecodeCode(encoding)
+ };
+ }
+
+ public Tensor GetEmbeddings(string code)
+ {
+ var input = TokenizeCode(code);
+ return Layers[0].Forward(input);
+ }
+
+ public override Tensor Predict(Tensor input)
+ {
+ SetTrainingMode(false);
+ var output = input;
+ foreach (var layer in Layers)
+ {
+ output = layer.Forward(output);
+ }
+ return output;
+ }
+
+ public override void Train(Tensor input, Tensor expectedOutput)
+ {
+ SetTrainingMode(true);
+ var output = Predict(input);
+ var loss = LossFunction.ComputeLoss(output, expectedOutput);
+ AddLoss(loss);
+
+ var gradient = LossFunction.ComputeGradient(output, expectedOutput);
+ for (int i = Layers.Count - 1; i >= 0; i--)
+ {
+ gradient = Layers[i].Backward(gradient);
+ }
+
+ _optimizer.UpdateParameters();
+ }
+
+ public override ModelMetadata GetModelMetadata()
+ {
+ return new ModelMetadata
+ {
+ ModelType = "CodeT5",
+ ParameterCount = ParameterCount,
+ InputSize = _architecture.InputSize,
+ OutputSize = _architecture.OutputSize,
+ TrainingLosses = GetLosses()
+ };
+ }
+
+ protected override void SerializeNetworkSpecificData(BinaryWriter writer)
+ {
+ writer.Write((int)_architecture.TargetLanguage);
+ writer.Write(_architecture.MaxSequenceLength);
+ writer.Write(_architecture.VocabularySize);
+ writer.Write(_architecture.NumEncoderLayers);
+ writer.Write(_architecture.NumDecoderLayers);
+ }
+
+ protected override void DeserializeNetworkSpecificData(BinaryReader reader)
+ {
+ var targetLanguage = (ProgramLanguage)reader.ReadInt32();
+ var maxSeqLength = reader.ReadInt32();
+ var vocabSize = reader.ReadInt32();
+ var numEncoderLayers = reader.ReadInt32();
+ var numDecoderLayers = reader.ReadInt32();
+ }
+
+ protected override IFullModel, Tensor> CreateNewInstance()
+ {
+ return new CodeT5(_architecture, LossFunction, _optimizer);
+ }
+
+ // Helper methods
+ private Tensor TokenizeCode(string code)
+ {
+ var tokens = code.Split(new[] { ' ', '\n', '\t' }, StringSplitOptions.RemoveEmptyEntries);
+ var tokenIds = new int[Math.Min(tokens.Length, _architecture.MaxSequenceLength)];
+
+ for (int i = 0; i < tokenIds.Length; i++)
+ {
+ tokenIds[i] = Math.Abs(tokens[i].GetHashCode()) % _architecture.VocabularySize;
+ }
+
+ return Tensor.FromArray(Array.ConvertAll(tokenIds, id => (T)Convert.ChangeType(id, typeof(T))));
+ }
+
+ private string DetokenizeCode(Tensor encoding)
+ {
+ return "// Generated code from CodeT5";
+ }
+
+ private string PerformGeneration(string description)
+ {
+ // Generate code from natural language description
+ return $"// Generated code based on: {description}";
+ }
+
+ private string PerformTranslation(string code)
+ {
+ // Translate code between languages
+ return $"// Translated code to {_architecture.TargetLanguage}";
+ }
+
+ private string PerformSummarization(string code)
+ {
+ // Generate natural language summary of code
+ return "// Summary: This code implements...";
+ }
+
+ private string PerformRefactoring(string code)
+ {
+ // Generate refactored version of code
+ return "// Refactored code";
+ }
+}
diff --git a/src/ProgramSynthesis/Engines/GraphCodeBERT.cs b/src/ProgramSynthesis/Engines/GraphCodeBERT.cs
new file mode 100644
index 000000000..fbd6d6cc2
--- /dev/null
+++ b/src/ProgramSynthesis/Engines/GraphCodeBERT.cs
@@ -0,0 +1,281 @@
+using AiDotNet.Interfaces;
+using AiDotNet.LinearAlgebra;
+using AiDotNet.LossFunctions;
+using AiDotNet.Models;
+using AiDotNet.NeuralNetworks;
+using AiDotNet.NeuralNetworks.Helpers;
+using AiDotNet.NeuralNetworks.Layers;
+using AiDotNet.Optimizers;
+using AiDotNet.ProgramSynthesis.Enums;
+using AiDotNet.ProgramSynthesis.Interfaces;
+using AiDotNet.ProgramSynthesis.Models;
+
+namespace AiDotNet.ProgramSynthesis.Engines;
+
+///
+/// GraphCodeBERT extends CodeBERT by incorporating data flow analysis.
+///
+/// The numeric type used for calculations (e.g., double, float).
+///
+///
+/// GraphCodeBERT combines source code with data flow information to better understand
+/// code semantics. It uses graph neural networks to model the relationships between
+/// variables, functions, and data dependencies in code.
+///
+/// For Beginners: GraphCodeBERT understands how data flows through code.
+///
+/// While CodeBERT reads code like text, GraphCodeBERT also understands:
+/// - Which variables depend on which others
+/// - How data flows from one function to another
+/// - The relationships and connections in code structure
+///
+/// Think of it like understanding a city:
+/// - CodeBERT sees the streets and buildings (structure)
+/// - GraphCodeBERT also sees how traffic flows and which roads connect (data flow)
+///
+/// This deeper understanding helps with tasks like bug detection and code optimization.
+///
+///
+public class GraphCodeBERT : NeuralNetworkBase, ICodeModel
+{
+ private readonly CodeSynthesisArchitecture _architecture;
+ private IGradientBasedOptimizer, Tensor> _optimizer;
+
+ public ProgramLanguage TargetLanguage => _architecture.TargetLanguage;
+ public int MaxSequenceLength => _architecture.MaxSequenceLength;
+ public int VocabularySize => _architecture.VocabularySize;
+
+ ///
+ /// Gets whether this model uses data flow analysis.
+ ///
+ ///
+ ///
+ /// GraphCodeBERT's key differentiator is its use of data flow graphs to
+ /// understand code beyond just sequential structure.
+ ///
+ /// For Beginners: This shows whether the model tracks how data moves.
+ ///
+ /// When true, the model doesn't just read code line by line - it builds a map
+ /// of how data flows between different parts of the code, giving deeper understanding.
+ ///
+ ///
+ public bool UsesDataFlow => _architecture.UseDataFlow;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The architecture configuration (should have UseDataFlow=true).
+ /// Optional loss function.
+ /// Optional optimizer.
+ ///
+ ///
+ /// Creates a new GraphCodeBERT model with data flow analysis capabilities.
+ /// The architecture should have UseDataFlow set to true to enable graph-based processing.
+ ///
+ /// For Beginners: This creates a new GraphCodeBERT model.
+ ///
+ /// Similar to CodeBERT, but with extra capabilities to understand data flow.
+ /// Make sure the architecture has UseDataFlow enabled to get the full benefit.
+ ///
+ ///
+ public GraphCodeBERT(
+ CodeSynthesisArchitecture architecture,
+ ILossFunction? lossFunction = null,
+ IGradientBasedOptimizer, Tensor>? optimizer = null)
+ : base(architecture, lossFunction ?? new CrossEntropyLoss())
+ {
+ _architecture = architecture;
+ _optimizer = optimizer ?? new AdamOptimizer, Tensor>(this);
+
+ if (!architecture.UseDataFlow)
+ {
+ Console.WriteLine("Warning: GraphCodeBERT works best with UseDataFlow=true in architecture.");
+ }
+
+ InitializeLayers();
+ }
+
+ protected override void InitializeLayers()
+ {
+ if (Architecture.Layers != null && Architecture.Layers.Count > 0)
+ {
+ Layers.AddRange(Architecture.Layers);
+ ValidateCustomLayers(Layers);
+ }
+ else
+ {
+ // Embedding layer
+ Layers.Add(new EmbeddingLayer(
+ vocabularySize: _architecture.VocabularySize,
+ embeddingDimension: _architecture.ModelDimension,
+ maxSequenceLength: _architecture.MaxSequenceLength,
+ usePositionalEncoding: _architecture.UsePositionalEncoding));
+
+ // Graph convolution layers for data flow
+ if (_architecture.UseDataFlow)
+ {
+ Layers.Add(new GraphConvolutionalLayer(
+ inputFeatures: _architecture.ModelDimension,
+ outputFeatures: _architecture.ModelDimension));
+ }
+
+ // Standard transformer encoder layers
+ for (int i = 0; i < _architecture.NumEncoderLayers; i++)
+ {
+ Layers.Add(new MultiHeadAttentionLayer(
+ modelDimension: _architecture.ModelDimension,
+ numHeads: _architecture.NumHeads,
+ dropout: _architecture.DropoutRate));
+
+ Layers.Add(new LayerNormalizationLayer(
+ normalizedShape: new[] { _architecture.ModelDimension }));
+
+ Layers.Add(new DenseLayer(
+ inputSize: _architecture.ModelDimension,
+ outputSize: _architecture.FeedForwardDimension,
+ activationFunction: new GELUActivationFunction()));
+
+ Layers.Add(new DenseLayer(
+ inputSize: _architecture.FeedForwardDimension,
+ outputSize: _architecture.ModelDimension,
+ activationFunction: null));
+
+ Layers.Add(new LayerNormalizationLayer(
+ normalizedShape: new[] { _architecture.ModelDimension }));
+
+ Layers.Add(new DropoutLayer(_architecture.DropoutRate));
+ }
+
+ // Output layer
+ Layers.Add(new DenseLayer(
+ inputSize: _architecture.ModelDimension,
+ outputSize: _architecture.VocabularySize,
+ activationFunction: null));
+ }
+ }
+
+ public Tensor EncodeCode(string code)
+ {
+ var input = TokenizeCode(code);
+ return Predict(input);
+ }
+
+ public string DecodeCode(Tensor encoding)
+ {
+ return DetokenizeCode(encoding);
+ }
+
+ public string PerformTask(string code, CodeTask task)
+ {
+ var encoding = EncodeCode(code);
+
+ return task switch
+ {
+ CodeTask.BugDetection => PerformBugDetectionWithDataFlow(encoding, code),
+ CodeTask.Refactoring => PerformRefactoring(encoding),
+ CodeTask.Understanding => PerformCodeUnderstanding(encoding),
+ _ => DecodeCode(encoding)
+ };
+ }
+
+ public Tensor GetEmbeddings(string code)
+ {
+ var input = TokenizeCode(code);
+ return Layers[0].Forward(input);
+ }
+
+ public override Tensor Predict(Tensor input)
+ {
+ SetTrainingMode(false);
+ var output = input;
+ foreach (var layer in Layers)
+ {
+ output = layer.Forward(output);
+ }
+ return output;
+ }
+
+ public override void Train(Tensor input, Tensor expectedOutput)
+ {
+ SetTrainingMode(true);
+ var output = Predict(input);
+ var loss = LossFunction.ComputeLoss(output, expectedOutput);
+ AddLoss(loss);
+
+ var gradient = LossFunction.ComputeGradient(output, expectedOutput);
+ for (int i = Layers.Count - 1; i >= 0; i--)
+ {
+ gradient = Layers[i].Backward(gradient);
+ }
+
+ _optimizer.UpdateParameters();
+ }
+
+ public override ModelMetadata GetModelMetadata()
+ {
+ return new ModelMetadata
+ {
+ ModelType = "GraphCodeBERT",
+ ParameterCount = ParameterCount,
+ InputSize = _architecture.InputSize,
+ OutputSize = _architecture.OutputSize,
+ TrainingLosses = GetLosses()
+ };
+ }
+
+ protected override void SerializeNetworkSpecificData(BinaryWriter writer)
+ {
+ writer.Write((int)_architecture.TargetLanguage);
+ writer.Write(_architecture.MaxSequenceLength);
+ writer.Write(_architecture.VocabularySize);
+ writer.Write(_architecture.UseDataFlow);
+ }
+
+ protected override void DeserializeNetworkSpecificData(BinaryReader reader)
+ {
+ var targetLanguage = (ProgramLanguage)reader.ReadInt32();
+ var maxSeqLength = reader.ReadInt32();
+ var vocabSize = reader.ReadInt32();
+ var useDataFlow = reader.ReadBoolean();
+ }
+
+ protected override IFullModel, Tensor> CreateNewInstance()
+ {
+ return new GraphCodeBERT(_architecture, LossFunction, _optimizer);
+ }
+
+ // Helper methods
+ private Tensor TokenizeCode(string code)
+ {
+ var tokens = code.Split(new[] { ' ', '\n', '\t' }, StringSplitOptions.RemoveEmptyEntries);
+ var tokenIds = new int[Math.Min(tokens.Length, _architecture.MaxSequenceLength)];
+
+ for (int i = 0; i < tokenIds.Length; i++)
+ {
+ tokenIds[i] = Math.Abs(tokens[i].GetHashCode()) % _architecture.VocabularySize;
+ }
+
+ return Tensor.FromArray(Array.ConvertAll(tokenIds, id => (T)Convert.ChangeType(id, typeof(T))));
+ }
+
+ private string DetokenizeCode(Tensor encoding)
+ {
+ return "// Generated code with data flow analysis";
+ }
+
+ private string PerformBugDetectionWithDataFlow(Tensor encoding, string code)
+ {
+ // Enhanced bug detection using data flow
+ return "// Bug detection with data flow analysis: No issues found";
+ }
+
+ private string PerformRefactoring(Tensor encoding)
+ {
+ return "// Refactored code";
+ }
+
+ private string PerformCodeUnderstanding(Tensor encoding)
+ {
+ return "// Code analysis: This code implements...";
+ }
+}
diff --git a/src/ProgramSynthesis/Engines/NeuralProgramSynthesizer.cs b/src/ProgramSynthesis/Engines/NeuralProgramSynthesizer.cs
new file mode 100644
index 000000000..3939c73ad
--- /dev/null
+++ b/src/ProgramSynthesis/Engines/NeuralProgramSynthesizer.cs
@@ -0,0 +1,410 @@
+using AiDotNet.Interfaces;
+using AiDotNet.LinearAlgebra;
+using AiDotNet.LossFunctions;
+using AiDotNet.Models;
+using AiDotNet.NeuralNetworks;
+using AiDotNet.NeuralNetworks.Helpers;
+using AiDotNet.NeuralNetworks.Layers;
+using AiDotNet.Optimizers;
+using AiDotNet.ProgramSynthesis.Enums;
+using AiDotNet.ProgramSynthesis.Interfaces;
+using AiDotNet.ProgramSynthesis.Models;
+
+namespace AiDotNet.ProgramSynthesis.Engines;
+
+///
+/// Neural network-based program synthesizer that generates programs from specifications.
+///
+/// The numeric type used for calculations (e.g., double, float).
+///
+///
+/// NeuralProgramSynthesizer uses deep learning to generate programs from natural language
+/// descriptions, input-output examples, or formal specifications. It employs an encoder-decoder
+/// architecture similar to CodeT5 but optimized for program synthesis tasks.
+///
+/// For Beginners: This AI can write programs for you automatically!
+///
+/// Imagine describing what you want a program to do, or showing examples of
+/// inputs and outputs, and an AI writes the actual code. That's what this does!
+///
+/// You can provide:
+/// - A description: "Write a function that sorts a list of numbers"
+/// - Examples: Input [3,1,2] → Output [1,2,3]
+/// - Or both!
+///
+/// The AI learns from training and generates working code that solves your problem.
+/// It's like having an AI programmer that can code based on your requirements!
+///
+///
+public class NeuralProgramSynthesizer : NeuralNetworkBase, IProgramSynthesizer
+{
+ private readonly CodeSynthesisArchitecture _architecture;
+ private IGradientBasedOptimizer, Tensor> _optimizer;
+ private readonly ICodeModel _codeModel;
+
+ public SynthesisType SynthesisType => _architecture.SynthesisType;
+ public ProgramLanguage TargetLanguage => _architecture.TargetLanguage;
+ public int MaxProgramLength => _architecture.MaxProgramLength;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The synthesis architecture configuration.
+ /// The underlying code model (CodeT5 recommended).
+ /// Optional loss function.
+ /// Optional optimizer.
+ ///
+ ///
+ /// Creates a new neural program synthesizer. Uses a code model (like CodeT5)
+ /// as the backbone for understanding requirements and generating code.
+ ///
+ /// For Beginners: This sets up the AI program writer.
+ ///
+ /// You need to provide:
+ /// - Architecture: The blueprint for how it works
+ /// - Code model: The brain that understands and generates code (usually CodeT5)
+ /// - Optional: Loss function and optimizer for training
+ ///
+ /// Once set up, you can ask it to write programs for you!
+ ///
+ ///
+ public NeuralProgramSynthesizer(
+ CodeSynthesisArchitecture architecture,
+ ICodeModel codeModel,
+ ILossFunction? lossFunction = null,
+ IGradientBasedOptimizer, Tensor>? optimizer = null)
+ : base(architecture, lossFunction ?? new CrossEntropyLoss())
+ {
+ _architecture = architecture;
+ _codeModel = codeModel;
+ _optimizer = optimizer ?? new AdamOptimizer, Tensor>(this);
+ InitializeLayers();
+ }
+
+ protected override void InitializeLayers()
+ {
+ // Use the code model's layers as the base
+ // Additional synthesis-specific layers can be added here
+ if (Architecture.Layers != null && Architecture.Layers.Count > 0)
+ {
+ Layers.AddRange(Architecture.Layers);
+ }
+ else
+ {
+ // Synthesis-specific processing layers
+ Layers.Add(new EmbeddingLayer(
+ vocabularySize: _architecture.VocabularySize,
+ embeddingDimension: _architecture.ModelDimension,
+ maxSequenceLength: _architecture.MaxSequenceLength,
+ usePositionalEncoding: true));
+
+ // Program structure encoding layers
+ for (int i = 0; i < 4; i++)
+ {
+ Layers.Add(new MultiHeadAttentionLayer(
+ modelDimension: _architecture.ModelDimension,
+ numHeads: _architecture.NumHeads,
+ dropout: _architecture.DropoutRate));
+
+ Layers.Add(new LayerNormalizationLayer(
+ normalizedShape: new[] { _architecture.ModelDimension }));
+ }
+
+ // Output projection
+ Layers.Add(new DenseLayer(
+ inputSize: _architecture.ModelDimension,
+ outputSize: _architecture.VocabularySize,
+ activationFunction: null));
+ }
+ }
+
+ ///
+ /// Synthesizes a program from the given input specification.
+ ///
+ /// The input specification containing requirements or examples.
+ /// A synthesized program that meets the specification.
+ ///
+ ///
+ /// This is the main synthesis method. It processes the input specification through
+ /// the neural network and generates code that satisfies the requirements.
+ ///
+ /// For Beginners: This is where the magic happens - it writes code for you!
+ ///
+ /// You provide what you want (description, examples, constraints), and this
+ /// method generates actual working code. The process:
+ /// 1. Understand your requirements
+ /// 2. Generate candidate code
+ /// 3. Validate the code
+ /// 4. Return the best solution
+ ///
+ /// Like asking an AI chef for a recipe and getting step-by-step instructions!
+ ///
+ ///
+ public Program SynthesizeProgram(ProgramInput input)
+ {
+ // Encode the input specification
+ var encoding = EncodeSpecification(input);
+
+ // Generate program using the code model
+ var generatedCode = GenerateCodeFromEncoding(encoding, input);
+
+ // Create program object
+ var program = new Program(
+ sourceCode: generatedCode,
+ language: input.TargetLanguage,
+ isValid: false, // Will be validated next
+ fitnessScore: 0.0,
+ complexity: EstimateComplexity(generatedCode));
+
+ // Validate the program
+ program.IsValid = ValidateProgram(program);
+
+ // Evaluate if test cases provided
+ if (input.Examples != null && input.Examples.Count > 0)
+ {
+ program.FitnessScore = EvaluateProgram(program, input);
+ }
+
+ return program;
+ }
+
+ ///
+ /// Validates whether a synthesized program is correct and well-formed.
+ ///
+ /// The program to validate.
+ /// True if the program is valid, false otherwise.
+ ///
+ ///
+ /// Checks if the program is syntactically correct and can potentially be executed.
+ /// This includes parsing, syntax checking, and basic semantic validation.
+ ///
+ /// For Beginners: This checks if the generated code will work.
+ ///
+ /// Before using generated code, we check:
+ /// - Is the syntax correct? (no typos)
+ /// - Does it make logical sense?
+ /// - Will it compile/run?
+ ///
+ /// Like proofreading an essay before submitting it.
+ ///
+ ///
+ public bool ValidateProgram(Program program)
+ {
+ // Basic validation checks
+ if (string.IsNullOrWhiteSpace(program.SourceCode))
+ return false;
+
+ // Check complexity constraints
+ if (program.Complexity > MaxProgramLength)
+ return false;
+
+ // Language-specific syntax validation would go here
+ // For now, basic checks
+ try
+ {
+ // Placeholder for actual syntax validation
+ // In production, use language-specific parsers
+ return !program.SourceCode.Contains("ERROR") &&
+ !program.SourceCode.Contains("INVALID");
+ }
+ catch
+ {
+ return false;
+ }
+ }
+
+ ///
+ /// Evaluates how well a program satisfies the input specification.
+ ///
+ /// The program to evaluate.
+ /// Test cases to evaluate the program against.
+ /// A fitness score indicating how well the program meets requirements (0-1).
+ ///
+ ///
+ /// Runs the program against test cases and calculates a fitness score based on
+ /// how many tests pass and how well the outputs match expectations.
+ ///
+ /// For Beginners: This grades how well the program works.
+ ///
+ /// Tests the program and gives it a score (like a percentage grade):
+ /// - 1.0 = Perfect! Passes all tests
+ /// - 0.5 = Passes half the tests
+ /// - 0.0 = Doesn't work at all
+ ///
+ /// The score helps us know if the program is good enough or needs improvement.
+ ///
+ ///
+ public double EvaluateProgram(Program program, ProgramInput testCases)
+ {
+ if (!program.IsValid)
+ return 0.0;
+
+ if (testCases.Examples == null || testCases.Examples.Count == 0)
+ return 0.5; // No tests to run, assume partial fitness
+
+ int passedTests = 0;
+ var examples = testCases.Examples;
+
+ foreach (var (input, expectedOutput) in examples)
+ {
+ // In production, actually execute the program with the input
+ // For now, simplified evaluation
+ var result = ExecuteProgram(program, input);
+
+ if (result == expectedOutput)
+ {
+ passedTests++;
+ }
+ }
+
+ return (double)passedTests / examples.Count;
+ }
+
+ ///
+ /// Refines an existing program to better meet the specification.
+ ///
+ /// The program to refine.
+ /// Feedback or test cases that failed.
+ /// A refined version of the program.
+ ///
+ ///
+ /// Takes an existing program and improves it based on feedback from failed tests
+ /// or user corrections. Uses the neural network to generate a better version.
+ ///
+ /// For Beginners: This improves a program based on feedback.
+ ///
+ /// If the first version isn't quite right:
+ /// 1. Look at what went wrong (failed tests)
+ /// 2. Generate an improved version
+ /// 3. Keep the good parts, fix the problems
+ ///
+ /// Like editing a draft based on reviewer comments to make it better.
+ ///
+ ///
+ public Program RefineProgram(Program program, ProgramInput feedback)
+ {
+ // Create a new input that includes the existing program and feedback
+ var refinementInput = new ProgramInput
+ {
+ Description = $"Refine this program:\n{program.SourceCode}\n\nFeedback:\n{feedback.Description}",
+ TargetLanguage = program.Language,
+ Examples = feedback.Examples,
+ TestCases = feedback.TestCases,
+ Constraints = feedback.Constraints
+ };
+
+ // Synthesize improved version
+ var refinedProgram = SynthesizeProgram(refinementInput);
+
+ // If refinement didn't improve, return original
+ if (refinedProgram.FitnessScore <= program.FitnessScore)
+ {
+ return program;
+ }
+
+ return refinedProgram;
+ }
+
+ public override Tensor Predict(Tensor input)
+ {
+ SetTrainingMode(false);
+ var output = input;
+ foreach (var layer in Layers)
+ {
+ output = layer.Forward(output);
+ }
+ return output;
+ }
+
+ public override void Train(Tensor input, Tensor expectedOutput)
+ {
+ SetTrainingMode(true);
+ var output = Predict(input);
+ var loss = LossFunction.ComputeLoss(output, expectedOutput);
+ AddLoss(loss);
+
+ var gradient = LossFunction.ComputeGradient(output, expectedOutput);
+ for (int i = Layers.Count - 1; i >= 0; i--)
+ {
+ gradient = Layers[i].Backward(gradient);
+ }
+
+ _optimizer.UpdateParameters();
+ }
+
+ public override ModelMetadata GetModelMetadata()
+ {
+ return new ModelMetadata
+ {
+ ModelType = "NeuralProgramSynthesizer",
+ ParameterCount = ParameterCount,
+ InputSize = _architecture.InputSize,
+ OutputSize = _architecture.OutputSize,
+ TrainingLosses = GetLosses()
+ };
+ }
+
+ protected override void SerializeNetworkSpecificData(BinaryWriter writer)
+ {
+ writer.Write((int)_architecture.SynthesisType);
+ writer.Write((int)_architecture.TargetLanguage);
+ writer.Write(_architecture.MaxProgramLength);
+ }
+
+ protected override void DeserializeNetworkSpecificData(BinaryReader reader)
+ {
+ var synthesisType = (SynthesisType)reader.ReadInt32();
+ var targetLanguage = (ProgramLanguage)reader.ReadInt32();
+ var maxProgramLength = reader.ReadInt32();
+ }
+
+ protected override IFullModel, Tensor> CreateNewInstance()
+ {
+ return new NeuralProgramSynthesizer(_architecture, _codeModel, LossFunction, _optimizer);
+ }
+
+ // Helper methods
+ private Tensor EncodeSpecification(ProgramInput input)
+ {
+ // Combine description and examples into a unified encoding
+ var specText = input.Description ?? "";
+
+ if (input.Examples != null)
+ {
+ foreach (var (exInput, exOutput) in input.Examples)
+ {
+ specText += $"\nExample: {exInput} -> {exOutput}";
+ }
+ }
+
+ return _codeModel.EncodeCode(specText);
+ }
+
+ private string GenerateCodeFromEncoding(Tensor encoding, ProgramInput input)
+ {
+ // Use the code model to generate code
+ var generated = _codeModel.DecodeCode(encoding);
+
+ // Apply constraints if specified
+ if (input.Constraints != null && input.Constraints.Count > 0)
+ {
+ // Constraint application logic would go here
+ }
+
+ return generated;
+ }
+
+ private int EstimateComplexity(string code)
+ {
+ // Simple complexity estimation based on code length and structure
+ var lines = code.Split('\n', StringSplitOptions.RemoveEmptyEntries);
+ return lines.Length;
+ }
+
+ private string ExecuteProgram(Program program, string input)
+ {
+ // Placeholder for actual program execution
+ // In production, use sandboxed execution environment
+ return "output";
+ }
+}
diff --git a/src/ProgramSynthesis/Enums/CodeTask.cs b/src/ProgramSynthesis/Enums/CodeTask.cs
new file mode 100644
index 000000000..b6e94be60
--- /dev/null
+++ b/src/ProgramSynthesis/Enums/CodeTask.cs
@@ -0,0 +1,227 @@
+namespace AiDotNet.ProgramSynthesis.Enums;
+
+///
+/// Defines the different types of code-related tasks that can be performed.
+///
+///
+///
+/// This enumeration categorizes the various operations that can be performed on code,
+/// from understanding and generation to transformation and quality assurance.
+///
+/// For Beginners: These are different things you might want to do with code.
+///
+/// Just like you can do different things with text (read, write, translate, summarize),
+/// you can do different things with code. This enum lists all the code-related tasks
+/// the system can help with.
+///
+///
+public enum CodeTask
+{
+ ///
+ /// Code completion task - suggesting how to complete partial code.
+ ///
+ ///
+ ///
+ /// Code completion predicts and suggests the next tokens or statements based on
+ /// the existing code context. Similar to autocomplete in text editors.
+ ///
+ /// For Beginners: Code completion is like autocomplete for programming.
+ ///
+ /// When you start typing code, the system suggests how to complete it, just like
+ /// your phone suggests words when you're texting. This saves time and reduces errors.
+ ///
+ ///
+ Completion,
+
+ ///
+ /// Code generation task - creating new code from specifications or descriptions.
+ ///
+ ///
+ ///
+ /// Code generation creates complete code implementations from high-level descriptions,
+ /// requirements, or examples. This can range from single functions to entire programs.
+ ///
+ /// For Beginners: Code generation creates code from descriptions.
+ ///
+ /// You describe what you want in plain English (or provide examples), and the system
+ /// writes the code for you. Like asking a chef to make a dish from a description.
+ ///
+ ///
+ Generation,
+
+ ///
+ /// Code translation task - converting code from one language to another.
+ ///
+ ///
+ ///
+ /// Code translation transforms programs written in one programming language into
+ /// equivalent programs in another language, preserving functionality.
+ ///
+ /// For Beginners: Code translation converts code between languages.
+ ///
+ /// Like translating a book from English to Spanish, this converts code from one
+ /// programming language to another (like Python to Java) while keeping the same functionality.
+ ///
+ ///
+ Translation,
+
+ ///
+ /// Code summarization task - generating natural language descriptions of code.
+ ///
+ ///
+ ///
+ /// Code summarization creates concise natural language descriptions that explain
+ /// what a piece of code does, helping with documentation and code understanding.
+ ///
+ /// For Beginners: Code summarization explains what code does in plain English.
+ ///
+ /// It reads code and writes a human-readable description of what the code does,
+ /// like creating a book summary from the full text.
+ ///
+ ///
+ Summarization,
+
+ ///
+ /// Bug detection task - identifying potential errors and issues in code.
+ ///
+ ///
+ ///
+ /// Bug detection analyzes code to find errors, vulnerabilities, and potential issues
+ /// that could cause the program to fail or behave incorrectly.
+ ///
+ /// For Beginners: Bug detection finds mistakes in code.
+ ///
+ /// Like proofreading a document, this examines code to find errors before they
+ /// cause problems. It can catch typos, logic errors, and security vulnerabilities.
+ ///
+ ///
+ BugDetection,
+
+ ///
+ /// Bug fixing task - automatically repairing identified bugs in code.
+ ///
+ ///
+ ///
+ /// Bug fixing not only identifies bugs but also suggests or automatically applies
+ /// corrections to fix the identified issues.
+ ///
+ /// For Beginners: Bug fixing automatically corrects errors in code.
+ ///
+ /// After finding bugs, this goes a step further and actually fixes them, like
+ /// spell-check that not only finds typos but corrects them too.
+ ///
+ ///
+ BugFixing,
+
+ ///
+ /// Code refactoring task - improving code structure without changing functionality.
+ ///
+ ///
+ ///
+ /// Code refactoring restructures existing code to improve readability, maintainability,
+ /// or performance while preserving its external behavior.
+ ///
+ /// For Beginners: Refactoring makes code better without changing what it does.
+ ///
+ /// Like reorganizing a messy room - everything stays the same but becomes easier to
+ /// find and use. Makes code cleaner, easier to understand, and easier to modify later.
+ ///
+ ///
+ Refactoring,
+
+ ///
+ /// Code understanding task - analyzing and comprehending code semantics.
+ ///
+ ///
+ ///
+ /// Code understanding involves analyzing code to extract semantic information,
+ /// identify patterns, understand control flow, and grasp the program's logic.
+ ///
+ /// For Beginners: Code understanding means figuring out what code does.
+ ///
+ /// This involves reading and analyzing code to understand its purpose, how it works,
+ /// and what it accomplishes. Like reading comprehension for programming.
+ ///
+ ///
+ Understanding,
+
+ ///
+ /// Test generation task - automatically creating test cases for code.
+ ///
+ ///
+ ///
+ /// Test generation creates test cases that verify the correctness of code by
+ /// checking various inputs and expected outputs.
+ ///
+ /// For Beginners: Test generation creates checks to verify code works correctly.
+ ///
+ /// It automatically writes tests that check if your code does what it's supposed to do.
+ /// Like creating a checklist to make sure all features of a product work correctly.
+ ///
+ ///
+ TestGeneration,
+
+ ///
+ /// Code documentation task - generating documentation for code.
+ ///
+ ///
+ ///
+ /// Code documentation creates explanatory comments and documentation that describe
+ /// what code does, how to use it, and important implementation details.
+ ///
+ /// For Beginners: Documentation creates guides and explanations for code.
+ ///
+ /// It generates comments, user guides, and API documentation that explain how to use
+ /// the code. Like writing an instruction manual for a product.
+ ///
+ ///
+ Documentation,
+
+ ///
+ /// Code search task - finding relevant code based on queries.
+ ///
+ ///
+ ///
+ /// Code search finds relevant code snippets or functions based on natural language
+ /// queries or code patterns, helping developers find reusable code.
+ ///
+ /// For Beginners: Code search finds code that does what you need.
+ ///
+ /// You describe what you're looking for (like "function to sort a list"), and it
+ /// finds existing code that does that. Like a search engine for code.
+ ///
+ ///
+ Search,
+
+ ///
+ /// Clone detection task - identifying duplicate or similar code.
+ ///
+ ///
+ ///
+ /// Clone detection finds instances of duplicated or highly similar code, which can
+ /// indicate opportunities for refactoring or potential plagiarism.
+ ///
+ /// For Beginners: Clone detection finds copied or repeated code.
+ ///
+ /// It identifies places where the same or very similar code appears multiple times,
+ /// which often means the code could be simplified by reusing one version.
+ ///
+ ///
+ CloneDetection,
+
+ ///
+ /// Code review task - analyzing code quality and suggesting improvements.
+ ///
+ ///
+ ///
+ /// Code review evaluates code for quality, adherence to best practices, potential
+ /// issues, and suggests improvements or changes.
+ ///
+ /// For Beginners: Code review checks code quality and suggests improvements.
+ ///
+ /// Like having an experienced programmer review your code, this examines your code
+ /// for problems, style issues, and opportunities to make it better.
+ ///
+ ///
+ CodeReview
+}
diff --git a/src/ProgramSynthesis/Enums/ProgramLanguage.cs b/src/ProgramSynthesis/Enums/ProgramLanguage.cs
new file mode 100644
index 000000000..d606ebb4b
--- /dev/null
+++ b/src/ProgramSynthesis/Enums/ProgramLanguage.cs
@@ -0,0 +1,199 @@
+namespace AiDotNet.ProgramSynthesis.Enums;
+
+///
+/// Defines the programming languages that can be synthesized or processed.
+///
+///
+///
+/// This enumeration specifies the target programming languages for code synthesis,
+/// translation, and analysis tasks. Each language has its own syntax, semantics,
+/// and typical use cases.
+///
+/// For Beginners: This lists the different programming languages the system can work with.
+///
+/// Just like human languages (English, Spanish, French), there are many programming languages
+/// (Python, C#, Java). Each has its own rules and is better suited for different tasks.
+/// This enum helps the system know which language you want to work with.
+///
+///
+public enum ProgramLanguage
+{
+ ///
+ /// Python programming language.
+ ///
+ ///
+ ///
+ /// Python is a high-level, interpreted language known for its readability and extensive
+ /// ecosystem. It's widely used in data science, machine learning, web development,
+ /// and automation.
+ ///
+ /// For Beginners: Python is known for being easy to read and beginner-friendly.
+ ///
+ /// It's popular for AI, data analysis, and general programming. Code looks clean and
+ /// is relatively easy to understand, making it a great choice for many applications.
+ ///
+ ///
+ Python,
+
+ ///
+ /// C# programming language.
+ ///
+ ///
+ ///
+ /// C# is a modern, object-oriented language developed by Microsoft. It's used for
+ /// Windows applications, game development (Unity), web services, and enterprise software.
+ ///
+ /// For Beginners: C# is a powerful language used for many types of applications.
+ ///
+ /// It's particularly popular for Windows programs, games (especially with Unity),
+ /// and business applications. It has strong typing which helps catch errors early.
+ ///
+ ///
+ CSharp,
+
+ ///
+ /// Java programming language.
+ ///
+ ///
+ ///
+ /// Java is a widely-used, object-oriented language known for its "write once, run anywhere"
+ /// philosophy. It's popular for enterprise applications, Android development, and large-scale systems.
+ ///
+ /// For Beginners: Java is one of the most popular languages in the world.
+ ///
+ /// It's used for Android apps, large business systems, and web applications. Code written
+ /// in Java can run on different types of computers without modification.
+ ///
+ ///
+ Java,
+
+ ///
+ /// JavaScript programming language.
+ ///
+ ///
+ ///
+ /// JavaScript is the primary language for web browser programming and has expanded to
+ /// server-side development (Node.js). It's essential for interactive web applications
+ /// and is one of the most widely used languages.
+ ///
+ /// For Beginners: JavaScript makes websites interactive and dynamic.
+ ///
+ /// It runs in web browsers and powers most of the interactive features you see on websites.
+ /// It's also used for server-side programming and mobile app development.
+ ///
+ ///
+ JavaScript,
+
+ ///
+ /// TypeScript programming language.
+ ///
+ ///
+ ///
+ /// TypeScript is a superset of JavaScript that adds static typing. It helps catch
+ /// errors during development and is increasingly popular for large JavaScript applications.
+ ///
+ /// For Beginners: TypeScript is JavaScript with extra type checking.
+ ///
+ /// It helps prevent bugs by checking your code before it runs. Think of it as JavaScript
+ /// with helpful guardrails that catch mistakes early.
+ ///
+ ///
+ TypeScript,
+
+ ///
+ /// C++ programming language.
+ ///
+ ///
+ ///
+ /// C++ is a powerful, high-performance language used for system software, game engines,
+ /// and applications where speed is critical. It provides low-level control while supporting
+ /// high-level abstractions.
+ ///
+ /// For Beginners: C++ is known for speed and control over computer resources.
+ ///
+ /// It's used when performance is critical, like in game engines, operating systems,
+ /// and high-frequency trading systems. It's more complex but very powerful.
+ ///
+ ///
+ CPlusPlus,
+
+ ///
+ /// C programming language.
+ ///
+ ///
+ ///
+ /// C is a low-level language that provides fine-grained control over computer resources.
+ /// It's used for operating systems, embedded systems, and performance-critical applications.
+ ///
+ /// For Beginners: C is a foundational language that's close to how computers work.
+ ///
+ /// Many other languages are based on C. It's used for operating systems and programs
+ /// that need direct control over computer hardware.
+ ///
+ ///
+ C,
+
+ ///
+ /// Go (Golang) programming language.
+ ///
+ ///
+ ///
+ /// Go is a modern language designed at Google for building scalable network services
+ /// and concurrent applications. It emphasizes simplicity and has built-in concurrency support.
+ ///
+ /// For Beginners: Go is designed for building fast, reliable network services.
+ ///
+ /// It's simpler than some languages but still powerful, especially good for programs
+ /// that need to do many things at once (like web servers handling many users).
+ ///
+ ///
+ Go,
+
+ ///
+ /// Rust programming language.
+ ///
+ ///
+ ///
+ /// Rust is a systems programming language focused on safety, concurrency, and performance.
+ /// It prevents many common bugs through its unique ownership system.
+ ///
+ /// For Beginners: Rust helps you write safe and fast programs.
+ ///
+ /// It has special rules that prevent common programming errors (like memory bugs)
+ /// while still being very fast. Popular for system programming and security-critical applications.
+ ///
+ ///
+ Rust,
+
+ ///
+ /// SQL (Structured Query Language) for database operations.
+ ///
+ ///
+ ///
+ /// SQL is a domain-specific language for managing and querying relational databases.
+ /// It's essential for data manipulation and retrieval in database systems.
+ ///
+ /// For Beginners: SQL is for working with databases.
+ ///
+ /// It's not a general programming language but a specialized language for storing,
+ /// retrieving, and managing data in databases. Used everywhere data is stored.
+ ///
+ ///
+ SQL,
+
+ ///
+ /// Generic or language-agnostic representation.
+ ///
+ ///
+ ///
+ /// This option is used when working with abstract program representations that aren't
+ /// tied to a specific programming language, or when the language is not yet determined.
+ ///
+ /// For Beginners: Generic means not specific to any one language.
+ ///
+ /// Sometimes you want to work with the logic of a program without worrying about
+ /// which language it will eventually be written in. This option represents that.
+ ///
+ ///
+ Generic
+}
diff --git a/src/ProgramSynthesis/Enums/SynthesisType.cs b/src/ProgramSynthesis/Enums/SynthesisType.cs
new file mode 100644
index 000000000..58f6b698d
--- /dev/null
+++ b/src/ProgramSynthesis/Enums/SynthesisType.cs
@@ -0,0 +1,128 @@
+namespace AiDotNet.ProgramSynthesis.Enums;
+
+///
+/// Defines the different types of program synthesis approaches available.
+///
+///
+///
+/// This enumeration categorizes the various methodologies used for automated program synthesis.
+/// Each approach has different strengths and is suited for different types of programming tasks.
+///
+/// For Beginners: Think of these as different strategies for automatically creating programs.
+///
+/// Just like there are different approaches to solving a puzzle (looking at the picture, starting
+/// from corners, sorting by color), there are different ways to automatically generate code:
+/// - Neural: Uses neural networks that learn from examples
+/// - Symbolic: Uses logical rules and grammar
+/// - Hybrid: Combines neural and symbolic approaches
+/// - GeneticProgramming: Evolves programs through selection and mutation
+///
+///
+public enum SynthesisType
+{
+ ///
+ /// Neural network-based program synthesis using deep learning models.
+ ///
+ ///
+ ///
+ /// Neural synthesis uses trained neural networks to generate programs by learning patterns
+ /// from a large corpus of existing code. This approach is data-driven and can produce
+ /// creative solutions but may lack guarantees of correctness.
+ ///
+ /// For Beginners: Neural synthesis is like learning to code by studying lots of examples.
+ ///
+ /// The AI looks at thousands of code examples and learns patterns, then generates new code
+ /// based on what it has learned. Similar to how you might learn to write by reading many books.
+ ///
+ ///
+ Neural,
+
+ ///
+ /// Symbolic program synthesis using formal logic, grammars, and search algorithms.
+ ///
+ ///
+ ///
+ /// Symbolic synthesis uses formal methods, programming language grammars, and logical
+ /// constraints to systematically explore the space of possible programs. This approach
+ /// provides stronger correctness guarantees but may be limited in creativity.
+ ///
+ /// For Beginners: Symbolic synthesis is like following a recipe or instruction manual.
+ ///
+ /// It uses strict rules about what code should look like and systematically tries different
+ /// combinations until it finds one that works. Like solving a math problem step by step.
+ ///
+ ///
+ Symbolic,
+
+ ///
+ /// Hybrid approach combining both neural and symbolic techniques.
+ ///
+ ///
+ ///
+ /// Hybrid synthesis combines the strengths of both neural and symbolic approaches,
+ /// using neural networks for creative exploration and symbolic methods for verification
+ /// and constraint satisfaction.
+ ///
+ /// For Beginners: Hybrid synthesis combines the best of both worlds.
+ ///
+ /// It uses neural networks to come up with creative ideas quickly, then uses symbolic
+ /// methods to check and refine them. Like brainstorming ideas (neural) then fact-checking them (symbolic).
+ ///
+ ///
+ Hybrid,
+
+ ///
+ /// Genetic programming approach using evolutionary algorithms.
+ ///
+ ///
+ ///
+ /// Genetic programming evolves programs through processes inspired by biological evolution,
+ /// including selection, crossover (combining parts of programs), and mutation (random changes).
+ /// Programs that perform better are more likely to survive and reproduce.
+ ///
+ /// For Beginners: Genetic programming is like evolution in nature.
+ ///
+ /// It creates a population of random programs, tests them, keeps the best ones, and
+ /// creates new programs by mixing and mutating the good ones. Over many generations,
+ /// the programs get better and better, like species evolving over time.
+ ///
+ ///
+ GeneticProgramming,
+
+ ///
+ /// Inductive program synthesis that learns from input-output examples.
+ ///
+ ///
+ ///
+ /// Inductive synthesis generates programs by generalizing from a set of input-output
+ /// examples. This is particularly useful when users can provide examples of desired
+ /// behavior but may not know how to express the logic formally.
+ ///
+ /// For Beginners: Inductive synthesis learns from examples of what you want.
+ ///
+ /// Instead of telling the computer exactly what to do, you show it examples:
+ /// "When input is [1,2,3], output should be 6"
+ /// "When input is [4,5], output should be 9"
+ /// The system figures out you want it to sum the numbers.
+ ///
+ ///
+ Inductive,
+
+ ///
+ /// Deductive program synthesis from formal specifications.
+ ///
+ ///
+ ///
+ /// Deductive synthesis constructs programs from formal specifications that precisely
+ /// describe the desired behavior. This approach provides strong correctness guarantees
+ /// but requires users to provide detailed formal specifications.
+ ///
+ /// For Beginners: Deductive synthesis works from precise descriptions.
+ ///
+ /// You provide a detailed specification of exactly what the program should do using
+ /// mathematical logic or formal notation, and the system constructs a program that
+ /// provably meets that specification. Like building from detailed blueprints.
+ ///
+ ///
+ Deductive
+}
diff --git a/src/ProgramSynthesis/Interfaces/ICodeModel.cs b/src/ProgramSynthesis/Interfaces/ICodeModel.cs
new file mode 100644
index 000000000..79d033229
--- /dev/null
+++ b/src/ProgramSynthesis/Interfaces/ICodeModel.cs
@@ -0,0 +1,161 @@
+using AiDotNet.Interfaces;
+using AiDotNet.LinearAlgebra;
+using AiDotNet.ProgramSynthesis.Enums;
+using AiDotNet.ProgramSynthesis.Models;
+
+namespace AiDotNet.ProgramSynthesis.Interfaces;
+
+///
+/// Represents a code understanding model capable of processing and analyzing source code.
+///
+/// The numeric type used for calculations (e.g., double, float).
+///
+///
+/// ICodeModel defines the interface for models that can understand, encode, and analyze
+/// source code. These models are typically pre-trained on large corpora of code and can
+/// perform tasks like code completion, bug detection, and code summarization.
+///
+/// For Beginners: A code model is like an AI that understands programming.
+///
+/// Just as language models understand human languages, code models understand programming
+/// languages. They can:
+/// - Read and comprehend code
+/// - Suggest completions while you're writing
+/// - Find bugs and issues
+/// - Explain what code does
+/// - Translate between programming languages
+///
+/// This interface defines what capabilities a code model should have.
+///
+///
+public interface ICodeModel : IFullModel, Tensor>
+{
+ ///
+ /// Gets the target programming language for this model.
+ ///
+ ///
+ ///
+ /// Specifies which programming language this model is designed to work with.
+ /// Some models are language-specific, while others can work with multiple languages.
+ ///
+ /// For Beginners: This tells you which programming language the model knows.
+ ///
+ /// Like a translator who specializes in French or Spanish, code models often specialize
+ /// in specific programming languages like Python or Java.
+ ///
+ ///
+ ProgramLanguage TargetLanguage { get; }
+
+ ///
+ /// Gets the maximum sequence length (in tokens) that the model can process.
+ ///
+ ///
+ ///
+ /// Code models process code as sequences of tokens. This property specifies the
+ /// maximum number of tokens the model can handle at once.
+ ///
+ /// For Beginners: This is like the maximum length of code the model can read at once.
+ ///
+ /// Code is broken into pieces called "tokens" (like words in a sentence). This number
+ /// tells you the maximum number of tokens the model can process, which roughly
+ /// corresponds to how long a code file can be.
+ ///
+ ///
+ int MaxSequenceLength { get; }
+
+ ///
+ /// Gets the vocabulary size of the model.
+ ///
+ ///
+ ///
+ /// The vocabulary consists of all the tokens (keywords, operators, identifiers, etc.)
+ /// that the model knows and can work with.
+ ///
+ /// For Beginners: This is like the model's dictionary size.
+ ///
+ /// It tells you how many different code tokens the model knows. A larger vocabulary
+ /// means the model can handle more diverse code patterns and identifiers.
+ ///
+ ///
+ int VocabularySize { get; }
+
+ ///
+ /// Encodes source code into a vector representation.
+ ///
+ /// The source code to encode.
+ /// A tensor representing the encoded code.
+ ///
+ ///
+ /// Encoding transforms source code (text) into a numerical representation that
+ /// the model can process. This representation captures semantic information about the code.
+ ///
+ /// For Beginners: Encoding converts code text into numbers the AI can understand.
+ ///
+ /// Computers can't directly work with text, so we convert code into numerical form.
+ /// This encoding captures the meaning of the code, not just the characters.
+ /// Like translating emotions into emoji - different form, same meaning.
+ ///
+ ///
+ Tensor EncodeCode(string code);
+
+ ///
+ /// Decodes a vector representation back into source code.
+ ///
+ /// The encoded representation to decode.
+ /// The decoded source code as a string.
+ ///
+ ///
+ /// Decoding transforms the model's internal numerical representation back into
+ /// human-readable source code.
+ ///
+ /// For Beginners: Decoding converts the AI's numerical format back to readable code.
+ ///
+ /// After the AI processes code in numerical form, we need to convert it back to
+ /// text that humans can read and computers can execute. This is the reverse of encoding.
+ ///
+ ///
+ string DecodeCode(Tensor encoding);
+
+ ///
+ /// Performs a code-related task on the input code.
+ ///
+ /// The source code to process.
+ /// The type of task to perform.
+ /// The result of the task as a string.
+ ///
+ ///
+ /// This method allows the model to perform various code-related tasks such as
+ /// completion, summarization, bug detection, etc. based on the specified task type.
+ ///
+ /// For Beginners: This method lets you tell the model what to do with the code.
+ ///
+ /// You provide code and specify what you want done with it:
+ /// - Complete it
+ /// - Summarize it
+ /// - Find bugs
+ /// - Generate documentation
+ ///
+ /// The model then performs that specific task and returns the result.
+ ///
+ ///
+ string PerformTask(string code, CodeTask task);
+
+ ///
+ /// Gets embeddings for code tokens.
+ ///
+ /// The source code to get embeddings for.
+ /// A tensor containing token embeddings.
+ ///
+ ///
+ /// Embeddings are dense vector representations of code tokens that capture semantic
+ /// similarities. Similar code constructs have similar embeddings.
+ ///
+ /// For Beginners: Embeddings represent each piece of code as a point in space.
+ ///
+ /// Code with similar meaning is placed close together in this space. For example,
+ /// "for loop" and "while loop" would be near each other because they're both loops,
+ /// but far from "function definition" because that's a different concept.
+ ///
+ ///
+ Tensor GetEmbeddings(string code);
+}
diff --git a/src/ProgramSynthesis/Interfaces/IProgramSynthesizer.cs b/src/ProgramSynthesis/Interfaces/IProgramSynthesizer.cs
new file mode 100644
index 000000000..840448544
--- /dev/null
+++ b/src/ProgramSynthesis/Interfaces/IProgramSynthesizer.cs
@@ -0,0 +1,163 @@
+using AiDotNet.Interfaces;
+using AiDotNet.ProgramSynthesis.Enums;
+using AiDotNet.ProgramSynthesis.Models;
+
+namespace AiDotNet.ProgramSynthesis.Interfaces;
+
+///
+/// Represents a program synthesis engine capable of automatically generating programs.
+///
+/// The numeric type used for calculations (e.g., double, float).
+///
+///
+/// IProgramSynthesizer defines the interface for models that can automatically generate
+/// programs from specifications, examples, or natural language descriptions. This is a
+/// key component of automated programming and AI-assisted development.
+///
+/// For Beginners: A program synthesizer is like an AI programmer.
+///
+/// Imagine describing what you want a program to do, and an AI writes the code for you.
+/// That's what a program synthesizer does. You provide:
+/// - Examples of inputs and desired outputs
+/// - A description in plain English
+/// - Or formal specifications
+///
+/// And the synthesizer creates a working program that meets your requirements.
+/// This is like having an AI assistant that can code for you!
+///
+///
+public interface IProgramSynthesizer : IFullModel, Program>
+{
+ ///
+ /// Gets the type of synthesis approach used by this synthesizer.
+ ///
+ ///
+ ///
+ /// Different synthesis approaches have different strengths. Neural methods are
+ /// creative, symbolic methods are precise, and hybrid methods combine both.
+ ///
+ /// For Beginners: This tells you how the AI generates programs.
+ ///
+ /// Different approaches are like different problem-solving strategies:
+ /// - Neural: Learns from examples (like learning by watching)
+ /// - Symbolic: Uses logic and rules (like following instructions)
+ /// - Genetic: Evolves solutions (like natural selection)
+ ///
+ ///
+ SynthesisType SynthesisType { get; }
+
+ ///
+ /// Gets the target programming language for synthesis.
+ ///
+ ///
+ ///
+ /// Specifies which programming language the synthesized programs will be written in.
+ ///
+ /// For Beginners: This is the language the AI will write code in.
+ ///
+ /// Just like you choose whether to write in English or Spanish, this specifies
+ /// which programming language the generated code will use (Python, Java, etc.).
+ ///
+ ///
+ ProgramLanguage TargetLanguage { get; }
+
+ ///
+ /// Gets the maximum allowed length for synthesized programs.
+ ///
+ ///
+ ///
+ /// This limits the complexity and size of generated programs, measured in tokens
+ /// or abstract syntax tree nodes.
+ ///
+ /// For Beginners: This limits how long/complex the generated code can be.
+ ///
+ /// Like a word limit on an essay, this prevents the AI from generating programs
+ /// that are too large or complex. Helps ensure the code stays manageable.
+ ///
+ ///
+ int MaxProgramLength { get; }
+
+ ///
+ /// Synthesizes a program from the given input specification.
+ ///
+ /// The input specification containing requirements or examples.
+ /// A synthesized program that meets the specification.
+ ///
+ ///
+ /// This is the core synthesis method that generates a complete program from the
+ /// provided input specification. The input can contain examples, natural language
+ /// descriptions, or formal specifications.
+ ///
+ /// For Beginners: This is where the magic happens - it creates a program for you!
+ ///
+ /// You provide what you want (examples, description, etc.), and this method
+ /// generates actual working code that does what you asked for. Like asking
+ /// an AI chef for a recipe and getting step-by-step cooking instructions.
+ ///
+ ///
+ Program SynthesizeProgram(ProgramInput input);
+
+ ///
+ /// Validates whether a synthesized program is correct and well-formed.
+ ///
+ /// The program to validate.
+ /// True if the program is valid, false otherwise.
+ ///
+ ///
+ /// Validation checks syntactic correctness, semantic validity, and whether
+ /// the program compiles or can be executed.
+ ///
+ /// For Beginners: This checks if the generated code is valid and will work.
+ ///
+ /// Before using generated code, we need to check:
+ /// - Is the syntax correct? (no typos or grammar errors)
+ /// - Does it make sense? (logical consistency)
+ /// - Will it compile/run? (can the computer execute it)
+ ///
+ /// Like proofreading before submitting an essay.
+ ///
+ ///
+ bool ValidateProgram(Program program);
+
+ ///
+ /// Evaluates how well a program satisfies the input specification.
+ ///
+ /// The program to evaluate.
+ /// Test cases to evaluate the program against.
+ /// A fitness score indicating how well the program meets requirements (0-1, higher is better).
+ ///
+ ///
+ /// Evaluation tests the program against provided test cases and returns a score
+ /// indicating how well it performs. This is crucial for iterative refinement.
+ ///
+ /// For Beginners: This grades how well the generated program works.
+ ///
+ /// Just like a teacher grades homework, this checks how well the program solves
+ /// the problem. It runs tests and gives a score (like a percentage):
+ /// - 1.0 = Perfect, passes all tests
+ /// - 0.5 = Passes half the tests
+ /// - 0.0 = Doesn't work at all
+ ///
+ ///
+ double EvaluateProgram(Program program, ProgramInput testCases);
+
+ ///
+ /// Refines an existing program to better meet the specification.
+ ///
+ /// The program to refine.
+ /// Feedback or test cases that failed.
+ /// A refined version of the program.
+ ///
+ ///
+ /// Refinement takes an existing program and improves it based on feedback,
+ /// such as failed test cases or user corrections. This enables iterative improvement.
+ ///
+ /// For Beginners: This improves a program based on feedback.
+ ///
+ /// If the first version isn't quite right, this method improves it. Like editing
+ /// a draft based on reviewer comments - it takes the feedback and creates a
+ /// better version. Keeps the good parts and fixes the problems.
+ ///
+ ///
+ Program RefineProgram(Program program, ProgramInput feedback);
+}
diff --git a/src/ProgramSynthesis/Models/CodeSynthesisArchitecture.cs b/src/ProgramSynthesis/Models/CodeSynthesisArchitecture.cs
new file mode 100644
index 000000000..bfbfcdcf1
--- /dev/null
+++ b/src/ProgramSynthesis/Models/CodeSynthesisArchitecture.cs
@@ -0,0 +1,389 @@
+using AiDotNet.Enums;
+using AiDotNet.NeuralNetworks;
+using AiDotNet.NeuralNetworks.Layers;
+using AiDotNet.ProgramSynthesis.Enums;
+
+namespace AiDotNet.ProgramSynthesis.Models;
+
+///
+/// Defines the architecture configuration for code synthesis and understanding models.
+///
+/// The numeric type used for calculations (e.g., double, float).
+///
+///
+/// CodeSynthesisArchitecture extends the neural network architecture with code-specific
+/// parameters such as programming language, maximum code length, vocabulary size, and
+/// synthesis strategy. It serves as a blueprint for building code models like CodeBERT,
+/// GraphCodeBERT, and CodeT5.
+///
+/// For Beginners: This is a blueprint for building AI models that understand code.
+///
+/// Just like TransformerArchitecture defines how to build a general transformer,
+/// CodeSynthesisArchitecture defines how to build models specifically for:
+/// - Understanding code
+/// - Generating code
+/// - Translating between programming languages
+/// - Finding bugs
+/// - Completing code
+///
+/// It includes all the settings needed to build these specialized code models,
+/// like which programming language to work with and how much code it can handle.
+///
+///
+public class CodeSynthesisArchitecture : NeuralNetworkArchitecture
+{
+ ///
+ /// Gets the type of synthesis approach to use.
+ ///
+ ///
+ ///
+ /// Specifies whether to use neural, symbolic, hybrid, or genetic programming
+ /// approaches for code synthesis.
+ ///
+ /// For Beginners: This chooses the strategy for generating code.
+ ///
+ /// Different approaches work better for different problems:
+ /// - Neural: Good for learning from examples
+ /// - Symbolic: Good for following rules
+ /// - Hybrid: Combines both approaches
+ /// - GeneticProgramming: Good for optimization problems
+ ///
+ ///
+ public SynthesisType SynthesisType { get; }
+
+ ///
+ /// Gets the target programming language.
+ ///
+ ///
+ ///
+ /// Specifies which programming language the model is designed to work with.
+ ///
+ /// For Beginners: This is which programming language the model knows.
+ ///
+ /// Like a translator specializing in French or Spanish, code models often
+ /// specialize in specific languages like Python or Java.
+ ///
+ ///
+ public ProgramLanguage TargetLanguage { get; }
+
+ ///
+ /// Gets the number of encoder layers.
+ ///
+ ///
+ ///
+ /// The number of transformer encoder layers used to process and understand code.
+ /// More layers allow for deeper understanding but require more computation.
+ ///
+ /// For Beginners: This controls how deeply the model analyzes code.
+ ///
+ /// More encoder layers mean:
+ /// - Better understanding of complex code patterns
+ /// - Can capture more subtle relationships
+ /// - Takes more time and memory to process
+ ///
+ /// Typical values: 6-12 layers for code models.
+ ///
+ ///
+ public int NumEncoderLayers { get; }
+
+ ///
+ /// Gets the number of decoder layers (for generation tasks).
+ ///
+ ///
+ ///
+ /// The number of transformer decoder layers used to generate code.
+ /// Only relevant for encoder-decoder models like CodeT5.
+ ///
+ /// For Beginners: This controls how the model generates code.
+ ///
+ /// Decoder layers are used when the model needs to create new code:
+ /// - For code completion
+ /// - For code translation
+ /// - For code generation from descriptions
+ ///
+ /// Not all models need decoders - some only understand code (encoders only).
+ ///
+ ///
+ public int NumDecoderLayers { get; }
+
+ ///
+ /// Gets the number of attention heads.
+ ///
+ ///
+ ///
+ /// The number of parallel attention mechanisms in each layer. More heads
+ /// allow the model to focus on different aspects of code simultaneously.
+ ///
+ /// For Beginners: This is how many different things the model looks at simultaneously.
+ ///
+ /// Multiple attention heads let the model focus on:
+ /// - Variable definitions
+ /// - Function calls
+ /// - Control flow
+ /// - Data dependencies
+ /// All at the same time!
+ ///
+ /// Typical values: 8-16 heads.
+ ///
+ ///
+ public int NumHeads { get; }
+
+ ///
+ /// Gets the model dimension (embedding size).
+ ///
+ ///
+ ///
+ /// The size of the vector used to represent each token in the code.
+ /// Larger dimensions can capture more information but require more memory.
+ ///
+ /// For Beginners: This is how much information each code piece holds.
+ ///
+ /// Each word/token in code is represented by a vector of numbers.
+ /// This dimension controls the size of that vector:
+ /// - Larger: Can capture more nuanced meaning
+ /// - Smaller: Faster but less detailed
+ ///
+ /// Typical values: 256-768 for code models.
+ ///
+ ///
+ public int ModelDimension { get; }
+
+ ///
+ /// Gets the feed-forward network dimension.
+ ///
+ ///
+ ///
+ /// The size of the intermediate layer in the feed-forward networks within
+ /// each transformer layer. Usually 2-4 times the model dimension.
+ ///
+ /// For Beginners: This is the processing power in each layer.
+ ///
+ /// After attention, each layer has a feed-forward network that processes
+ /// the information. This dimension controls its size:
+ /// - Larger: More processing power
+ /// - Smaller: Faster but less capable
+ ///
+ /// Typical: 4 × ModelDimension (e.g., if ModelDim is 512, this would be 2048).
+ ///
+ ///
+ public int FeedForwardDimension { get; }
+
+ ///
+ /// Gets the maximum sequence length (in tokens).
+ ///
+ ///
+ ///
+ /// The maximum number of code tokens the model can process at once.
+ /// Longer sequences capture more context but require more memory and computation.
+ ///
+ /// For Beginners: This is the maximum length of code the model can handle.
+ ///
+ /// Code is broken into tokens (like words). This limits how many tokens:
+ /// - 512 tokens: ~200-400 lines of code
+ /// - 1024 tokens: ~400-800 lines of code
+ /// - 2048 tokens: ~800-1600 lines of code
+ ///
+ /// Longer files need to be split into chunks.
+ ///
+ ///
+ public int MaxSequenceLength { get; }
+
+ ///
+ /// Gets the vocabulary size.
+ ///
+ ///
+ ///
+ /// The number of unique tokens (keywords, operators, identifiers, etc.) in
+ /// the model's vocabulary. Larger vocabularies can represent more code patterns.
+ ///
+ /// For Beginners: This is the model's dictionary size for code.
+ ///
+ /// How many different code tokens the model knows:
+ /// - Keywords: if, for, while, class, etc.
+ /// - Operators: +, -, ==, etc.
+ /// - Common identifiers and patterns
+ ///
+ /// Typical values: 30,000-50,000 tokens for code models.
+ ///
+ ///
+ public int VocabularySize { get; }
+
+ ///
+ /// Gets the dropout rate for regularization.
+ ///
+ ///
+ ///
+ /// The probability of dropping neurons during training to prevent overfitting.
+ ///
+ /// For Beginners: This helps prevent the model from memorizing too much.
+ ///
+ /// Dropout randomly disables some neurons during training, which:
+ /// - Prevents overfitting (memorizing training data)
+ /// - Makes the model more robust
+ /// - Improves generalization to new code
+ ///
+ /// Typical value: 0.1 (10% of neurons randomly disabled during training).
+ ///
+ ///
+ public double DropoutRate { get; }
+
+ ///
+ /// Gets the maximum allowed program length for synthesis.
+ ///
+ ///
+ ///
+ /// Limits the size of programs that can be synthesized, measured in
+ /// abstract syntax tree nodes or lines of code.
+ ///
+ /// For Beginners: This limits how long generated programs can be.
+ ///
+ /// Prevents the AI from creating huge, unwieldy programs. Like a word limit
+ /// on an essay - keeps the output manageable and focused.
+ ///
+ ///
+ public int MaxProgramLength { get; }
+
+ ///
+ /// Gets whether to use positional encoding.
+ ///
+ ///
+ ///
+ /// Determines if positional information should be added to token embeddings
+ /// to help the model understand code order and structure.
+ ///
+ /// For Beginners: This helps the model understand code order.
+ ///
+ /// Without this, the model wouldn't know if "a = b" comes before or after "b = 5".
+ /// Positional encoding adds location information so the model understands:
+ /// - Which line comes first
+ /// - How far apart two statements are
+ /// - The sequential structure of code
+ ///
+ /// Usually set to true for code models.
+ ///
+ ///
+ public bool UsePositionalEncoding { get; }
+
+ ///
+ /// Gets whether to use data flow information (for GraphCodeBERT-style models).
+ ///
+ ///
+ ///
+ /// If true, the model will use graph-based representations that capture
+ /// data flow between variables and functions, not just sequential structure.
+ ///
+ /// For Beginners: This makes the model understand how data flows through code.
+ ///
+ /// Beyond just reading code line by line, this tracks:
+ /// - Which variables depend on which others
+ /// - How data flows from one function to another
+ /// - The relationships between different parts of code
+ ///
+ /// Like understanding not just the words in a recipe, but how ingredients
+ /// flow from one step to the next. Used in GraphCodeBERT models.
+ ///
+ ///
+ public bool UseDataFlow { get; }
+
+ ///
+ /// Gets the code task type this architecture is optimized for.
+ ///
+ ///
+ ///
+ /// Specifies the primary task this model will perform, which affects the
+ /// model structure and training approach.
+ ///
+ /// For Beginners: This is the main job the model will do.
+ ///
+ /// Code models can do many things:
+ /// - Complete code as you type
+ /// - Find bugs
+ /// - Translate between languages
+ /// - Generate documentation
+ ///
+ /// This setting optimizes the model for one specific task.
+ ///
+ ///
+ public CodeTask CodeTaskType { get; }
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The type of synthesis approach.
+ /// The target programming language.
+ /// The primary code task type.
+ /// Number of encoder layers.
+ /// Number of decoder layers.
+ /// Number of attention heads.
+ /// Size of token embeddings.
+ /// Size of feed-forward layers.
+ /// Maximum input sequence length.
+ /// Size of the code vocabulary.
+ /// Maximum length of synthesized programs.
+ /// Dropout rate for regularization.
+ /// Whether to use positional encoding.
+ /// Whether to use data flow analysis.
+ /// Overall network complexity.
+ /// Input size (calculated from vocabulary).
+ /// Output size (calculated from task).
+ /// Optional custom layers.
+ ///
+ ///
+ /// Creates a new code synthesis architecture with the specified parameters.
+ /// This configuration will be used to build code understanding and generation models.
+ ///
+ /// For Beginners: This constructor sets up all the parameters for a code model.
+ ///
+ /// When creating a code model, you specify:
+ /// - What approach to use (neural, symbolic, etc.)
+ /// - Which language to work with
+ /// - What task to perform
+ /// - How big and powerful the model should be
+ ///
+ /// Many parameters have sensible defaults, so you only need to set the ones
+ /// that matter for your specific use case.
+ ///
+ ///
+ public CodeSynthesisArchitecture(
+ SynthesisType synthesisType,
+ ProgramLanguage targetLanguage,
+ CodeTask codeTaskType,
+ int numEncoderLayers = 6,
+ int numDecoderLayers = 0,
+ int numHeads = 8,
+ int modelDimension = 512,
+ int feedForwardDimension = 2048,
+ int maxSequenceLength = 512,
+ int vocabularySize = 50000,
+ int maxProgramLength = 100,
+ double dropoutRate = 0.1,
+ bool usePositionalEncoding = true,
+ bool useDataFlow = false,
+ NetworkComplexity complexity = NetworkComplexity.Medium,
+ int inputSize = 0,
+ int outputSize = 0,
+ List>? layers = null)
+ : base(
+ inputType: InputType.OneDimensional,
+ taskType: NeuralNetworkTaskType.SequenceToSequence,
+ complexity: complexity,
+ inputSize: inputSize > 0 ? inputSize : vocabularySize,
+ outputSize: outputSize > 0 ? outputSize : vocabularySize,
+ layers: layers)
+ {
+ SynthesisType = synthesisType;
+ TargetLanguage = targetLanguage;
+ CodeTaskType = codeTaskType;
+ NumEncoderLayers = numEncoderLayers;
+ NumDecoderLayers = numDecoderLayers;
+ NumHeads = numHeads;
+ ModelDimension = modelDimension;
+ FeedForwardDimension = feedForwardDimension;
+ MaxSequenceLength = maxSequenceLength;
+ VocabularySize = vocabularySize;
+ MaxProgramLength = maxProgramLength;
+ DropoutRate = dropoutRate;
+ UsePositionalEncoding = usePositionalEncoding;
+ UseDataFlow = useDataFlow;
+ }
+}
diff --git a/src/ProgramSynthesis/Models/Program.cs b/src/ProgramSynthesis/Models/Program.cs
new file mode 100644
index 000000000..775b01955
--- /dev/null
+++ b/src/ProgramSynthesis/Models/Program.cs
@@ -0,0 +1,266 @@
+using AiDotNet.LinearAlgebra;
+using AiDotNet.ProgramSynthesis.Enums;
+
+namespace AiDotNet.ProgramSynthesis.Models;
+
+///
+/// Represents a synthesized program with its source code and metadata.
+///
+/// The numeric type used for calculations (e.g., double, float).
+///
+///
+/// The Program class encapsulates a synthesized or analyzed program, including its
+/// source code, the programming language it's written in, validation status, and
+/// optional execution metrics.
+///
+/// For Beginners: This class represents a computer program created by AI.
+///
+/// Think of this as a container that holds:
+/// - The actual code (like a recipe holds instructions)
+/// - What language it's written in (Python, Java, etc.)
+/// - Whether the code is valid and will run
+/// - How well it performs
+/// - An optional numerical representation that AI can work with
+///
+/// Just like a recipe card has the recipe, cooking time, and difficulty level,
+/// this class holds a program and all its important information.
+///
+///
+public class Program
+{
+ ///
+ /// Gets or sets the source code of the program.
+ ///
+ ///
+ ///
+ /// The actual program text in the target programming language. This is the
+ /// human-readable code that can be executed or compiled.
+ ///
+ /// For Beginners: This is the actual code - the instructions the computer will follow.
+ ///
+ /// Just like a recipe has step-by-step cooking instructions, this contains
+ /// the step-by-step commands that tell the computer what to do.
+ ///
+ ///
+ public string SourceCode { get; set; }
+
+ ///
+ /// Gets or sets the programming language of the program.
+ ///
+ ///
+ ///
+ /// Specifies which programming language the source code is written in.
+ /// This affects how the code should be interpreted, compiled, or executed.
+ ///
+ /// For Beginners: This tells you which programming language was used.
+ ///
+ /// Just like knowing whether a recipe is in English or French, this tells you
+ /// whether the code is in Python, Java, C#, etc. Different languages have
+ /// different rules and syntax.
+ ///
+ ///
+ public ProgramLanguage Language { get; set; }
+
+ ///
+ /// Gets or sets a value indicating whether the program is syntactically and semantically valid.
+ ///
+ ///
+ ///
+ /// Indicates whether the program passes validation checks, including syntax
+ /// correctness and semantic validity. A valid program can potentially be executed.
+ ///
+ /// For Beginners: This tells you if the code is correct and will run.
+ ///
+ /// Like checking a recipe for mistakes before cooking:
+ /// - Are all ingredients listed? (syntax)
+ /// - Do the instructions make sense? (semantics)
+ /// - Will following this recipe actually work? (validity)
+ ///
+ /// If IsValid is true, the code should run without errors.
+ ///
+ ///
+ public bool IsValid { get; set; }
+
+ ///
+ /// Gets or sets the fitness score of the program.
+ ///
+ ///
+ ///
+ /// A value between 0 and 1 indicating how well the program satisfies the
+ /// synthesis requirements. Higher values indicate better performance.
+ /// 1.0 means perfect, 0.0 means complete failure.
+ ///
+ /// For Beginners: This is like a grade showing how well the program works.
+ ///
+ /// Think of it as a score from 0% to 100%:
+ /// - 1.0 (100%): Perfect! Passes all tests
+ /// - 0.75 (75%): Pretty good, passes most tests
+ /// - 0.5 (50%): Mediocre, passes half the tests
+ /// - 0.0 (0%): Doesn't work at all
+ ///
+ /// Higher scores mean the program better solves the problem you gave it.
+ ///
+ ///
+ public double FitnessScore { get; set; }
+
+ ///
+ /// Gets or sets the complexity measure of the program.
+ ///
+ ///
+ ///
+ /// A metric indicating the complexity of the program, which could be based on
+ /// various factors like number of statements, cyclomatic complexity, or
+ /// abstract syntax tree size.
+ ///
+ /// For Beginners: This measures how complicated the program is.
+ ///
+ /// Just like recipes can be simple (toast) or complex (soufflé), programs
+ /// have different complexity levels. This number tells you:
+ /// - Low values: Simple, short programs that are easy to understand
+ /// - High values: Complex, longer programs with many steps
+ ///
+ /// Usually, simpler programs (lower complexity) are better when they
+ /// solve the same problem.
+ ///
+ ///
+ public int Complexity { get; set; }
+
+ ///
+ /// Gets or sets the encoded representation of the program.
+ ///
+ ///
+ ///
+ /// An optional numerical encoding of the program that can be used by neural
+ /// networks for further processing or refinement.
+ ///
+ /// For Beginners: This is a numerical version of the code for AI to work with.
+ ///
+ /// Computers and AI work better with numbers than text. This is the program
+ /// converted into a numerical form that AI can easily process, like converting
+ /// a photo into pixels. The original code is still in SourceCode - this is
+ /// just an alternative representation for computation.
+ ///
+ ///
+ public Tensor? Encoding { get; set; }
+
+ ///
+ /// Gets or sets any error messages from compilation or execution attempts.
+ ///
+ ///
+ ///
+ /// If the program failed validation or execution, this contains the error
+ /// messages explaining what went wrong.
+ ///
+ /// For Beginners: This explains what's wrong if the program doesn't work.
+ ///
+ /// When code has problems, we need to know why. This stores error messages like:
+ /// - "Syntax error on line 5: missing semicolon"
+ /// - "Variable 'x' not defined"
+ ///
+ /// These help debug and fix the program, like having someone point out
+ /// exactly what's wrong with a recipe.
+ ///
+ ///
+ public string? ErrorMessage { get; set; }
+
+ ///
+ /// Gets or sets execution time in milliseconds if the program was executed.
+ ///
+ ///
+ ///
+ /// Records how long the program took to execute, which can be useful for
+ /// performance comparison and optimization.
+ ///
+ /// For Beginners: This is how long the program takes to run.
+ ///
+ /// Measured in milliseconds (1000 milliseconds = 1 second). Helps answer:
+ /// - Is this program fast or slow?
+ /// - Which of two programs is faster?
+ ///
+ /// Lower execution time is usually better - it means the program finishes faster.
+ ///
+ ///
+ public double? ExecutionTimeMs { get; set; }
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The source code of the program.
+ /// The programming language.
+ /// Whether the program is valid.
+ /// The fitness score (default is 0.0).
+ /// The complexity measure (default is 0).
+ ///
+ ///
+ /// Creates a new Program instance with the specified source code and metadata.
+ /// This constructor is typically used when creating a synthesized program.
+ ///
+ /// For Beginners: This creates a new program object.
+ ///
+ /// When the AI generates or processes code, it creates a Program object
+ /// to store all the information. You need to provide:
+ /// - The actual code (required)
+ /// - What language it's in (required)
+ /// - Whether it's valid (required)
+ /// - Optional: fitness score and complexity
+ ///
+ /// Think of it like filling out a form with all the program's details.
+ ///
+ ///
+ public Program(
+ string sourceCode,
+ ProgramLanguage language,
+ bool isValid = false,
+ double fitnessScore = 0.0,
+ int complexity = 0)
+ {
+ SourceCode = sourceCode;
+ Language = language;
+ IsValid = isValid;
+ FitnessScore = fitnessScore;
+ Complexity = complexity;
+ }
+
+ ///
+ /// Initializes a new instance of the class with default values.
+ ///
+ ///
+ ///
+ /// Creates an empty Program instance. Useful when the program will be
+ /// populated later or when deserializing.
+ ///
+ /// For Beginners: This creates an empty program placeholder.
+ ///
+ /// Sometimes you need to create a Program object before you have all the
+ /// information. This creates an empty one that you can fill in later,
+ /// like having a blank form to fill out gradually.
+ ///
+ ///
+ public Program()
+ {
+ SourceCode = string.Empty;
+ Language = ProgramLanguage.Generic;
+ IsValid = false;
+ FitnessScore = 0.0;
+ Complexity = 0;
+ }
+
+ ///
+ /// Returns a string representation of the program.
+ ///
+ /// A string containing the source code.
+ ///
+ ///
+ /// Provides a string representation of the Program for display purposes.
+ ///
+ /// For Beginners: This converts the program to a readable string.
+ ///
+ /// When you need to display or print the program, this method returns
+ /// the source code as a string. Useful for debugging and logging.
+ ///
+ ///
+ public override string ToString()
+ {
+ return $"[{Language}] Valid: {IsValid}, Fitness: {FitnessScore:F2}, Complexity: {Complexity}\n{SourceCode}";
+ }
+}
diff --git a/src/ProgramSynthesis/Models/ProgramInput.cs b/src/ProgramSynthesis/Models/ProgramInput.cs
new file mode 100644
index 000000000..f10c08a75
--- /dev/null
+++ b/src/ProgramSynthesis/Models/ProgramInput.cs
@@ -0,0 +1,341 @@
+using AiDotNet.LinearAlgebra;
+using AiDotNet.ProgramSynthesis.Enums;
+
+namespace AiDotNet.ProgramSynthesis.Models;
+
+///
+/// Represents the input specification for program synthesis.
+///
+/// The numeric type used for calculations (e.g., double, float).
+///
+///
+/// ProgramInput encapsulates all the information needed to synthesize a program,
+/// including natural language descriptions, input-output examples, formal specifications,
+/// and constraints.
+///
+/// For Beginners: This class describes what you want the program to do.
+///
+/// When you want AI to create a program for you, you need to tell it what you want.
+/// This class lets you provide that information in different ways:
+/// - Describe it in plain English
+/// - Give examples of inputs and expected outputs
+/// - Specify constraints (like "must run in under 1 second")
+///
+/// Think of it like ordering at a restaurant - you tell the chef what you want,
+/// and they create the dish. This is how you tell the AI what program you want.
+///
+///
+public class ProgramInput
+{
+ ///
+ /// Gets or sets the natural language description of the desired program.
+ ///
+ ///
+ ///
+ /// A plain-English description of what the program should do. This can be
+ /// used by neural synthesis methods to understand the user's intent.
+ ///
+ /// For Beginners: This is where you describe what you want in plain English.
+ ///
+ /// Just like telling someone:
+ /// "I need a function that takes a list of numbers and returns the average"
+ ///
+ /// No programming knowledge needed - just explain what you want the program
+ /// to accomplish.
+ ///
+ ///
+ public string? Description { get; set; }
+
+ ///
+ /// Gets or sets the target programming language for synthesis.
+ ///
+ ///
+ ///
+ /// Specifies which programming language the synthesized program should be written in.
+ ///
+ /// For Beginners: This is which programming language you want the code in.
+ ///
+ /// Like choosing whether you want instructions in English or Spanish, this
+ /// tells the AI whether to generate code in Python, Java, C#, etc.
+ ///
+ ///
+ public ProgramLanguage TargetLanguage { get; set; }
+
+ ///
+ /// Gets or sets the input-output examples for inductive synthesis.
+ ///
+ ///
+ ///
+ /// A list of example inputs and their expected outputs. The synthesizer learns
+ /// from these examples to generate a program that generalizes to new inputs.
+ /// Each tuple contains (input, expectedOutput).
+ ///
+ /// For Beginners: These are examples showing what the program should do.
+ ///
+ /// Instead of explaining, you can show examples:
+ /// - Input: [1, 2, 3] → Output: 6 (sum)
+ /// - Input: [4, 5] → Output: 9 (sum)
+ /// - Input: [10] → Output: 10 (sum)
+ ///
+ /// The AI figures out the pattern from your examples. Like teaching by example
+ /// rather than explaining - show what you want, and the AI learns the rule.
+ ///
+ ///
+ public List<(string Input, string ExpectedOutput)>? Examples { get; set; }
+
+ ///
+ /// Gets or sets the formal specification in logic or a domain-specific language.
+ ///
+ ///
+ ///
+ /// A formal, mathematical specification of the program's behavior. This is used
+ /// by deductive synthesis methods to construct provably correct programs.
+ ///
+ /// For Beginners: This is a precise mathematical description (advanced).
+ ///
+ /// This is more advanced - it's a very precise, formal way to describe what
+ /// the program should do using mathematical logic. Like a detailed blueprint
+ /// with exact specifications. Most users will use Description or Examples instead.
+ ///
+ ///
+ public string? FormalSpecification { get; set; }
+
+ ///
+ /// Gets or sets constraints that the synthesized program must satisfy.
+ ///
+ ///
+ ///
+ /// A list of constraints or requirements for the program, such as:
+ /// - Performance requirements ("must run in O(n) time")
+ /// - Resource limits ("must use less than 1MB memory")
+ /// - Style requirements ("must use functional programming style")
+ ///
+ /// For Beginners: These are rules the program must follow.
+ ///
+ /// Beyond just working correctly, you might have specific requirements:
+ /// - "Must be fast"
+ /// - "Should be easy to read"
+ /// - "Can't use certain functions"
+ ///
+ /// Like telling a chef: "Make it vegetarian and gluten-free."
+ /// These constraints ensure the program meets your specific needs.
+ ///
+ ///
+ public List? Constraints { get; set; }
+
+ ///
+ /// Gets or sets the maximum allowed complexity for the synthesized program.
+ ///
+ ///
+ ///
+ /// Limits how complex the generated program can be. This helps ensure the
+ /// synthesizer produces simple, understandable code when possible.
+ ///
+ /// For Beginners: This limits how complicated the program can be.
+ ///
+ /// Sometimes simple is better. This sets a maximum complexity level:
+ /// - Low value: Forces simple solutions
+ /// - High value: Allows complex solutions if needed
+ ///
+ /// Like asking for a simple recipe instead of a gourmet one - both might
+ /// work, but simple is often better for learning and maintaining.
+ ///
+ ///
+ public int? MaxComplexity { get; set; }
+
+ ///
+ /// Gets or sets the timeout for program synthesis in milliseconds.
+ ///
+ ///
+ ///
+ /// Specifies how long the synthesizer should attempt to find a solution
+ /// before giving up. Prevents indefinite computation on difficult problems.
+ ///
+ /// For Beginners: This is how long the AI has to find a solution.
+ ///
+ /// Measured in milliseconds (1000ms = 1 second). Sometimes finding the perfect
+ /// program takes too long. This sets a time limit:
+ /// - 5000ms (5 seconds): Quick attempt, might not find best solution
+ /// - 60000ms (1 minute): More thorough search
+ ///
+ /// Like giving up on a crossword puzzle after 10 minutes - sometimes you
+ /// need to move on even if you haven't finished.
+ ///
+ ///
+ public int? TimeoutMs { get; set; }
+
+ ///
+ /// Gets or sets the test cases for program validation.
+ ///
+ ///
+ ///
+ /// Additional test cases (beyond the examples) used to validate the correctness
+ /// of synthesized programs. Each tuple contains (input, expectedOutput).
+ ///
+ /// For Beginners: These are additional tests to verify the program works.
+ ///
+ /// While Examples teach the AI, TestCases verify the result:
+ /// - Examples: "Learn from these"
+ /// - TestCases: "Prove you got it right with these"
+ ///
+ /// Like the difference between practice problems and an exam - test cases
+ /// help ensure the program truly works correctly.
+ ///
+ ///
+ public List<(string Input, string ExpectedOutput)>? TestCases { get; set; }
+
+ ///
+ /// Gets or sets an encoded representation of the input for neural processing.
+ ///
+ ///
+ ///
+ /// An optional numerical encoding of the input specification that can be
+ /// directly processed by neural networks.
+ ///
+ /// For Beginners: This is a numerical version for AI processing.
+ ///
+ /// Neural networks work with numbers, not text. This is an optional field
+ /// where the input can be pre-converted to numbers. Usually generated
+ /// automatically - you don't need to provide this yourself.
+ ///
+ ///
+ public Tensor? Encoding { get; set; }
+
+ ///
+ /// Gets or sets metadata tags for categorizing or filtering synthesis tasks.
+ ///
+ ///
+ ///
+ /// Optional tags that can be used to categorize the synthesis task, track
+ /// experiments, or provide additional context to the synthesizer.
+ ///
+ /// For Beginners: These are labels for organizing synthesis tasks.
+ ///
+ /// Like hashtags or folders, these help organize and categorize:
+ /// - "sorting", "algorithm", "beginner"
+ /// - "web-scraping", "python", "advanced"
+ ///
+ /// Useful for tracking different types of synthesis tasks and experiments.
+ ///
+ ///
+ public List? Tags { get; set; }
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The natural language description.
+ /// The target programming language.
+ /// Optional input-output examples.
+ /// Optional constraints.
+ ///
+ ///
+ /// Creates a new ProgramInput with the essential information needed for synthesis.
+ /// Additional properties can be set after construction.
+ ///
+ /// For Beginners: This creates a new specification for what program you want.
+ ///
+ /// Provide at minimum:
+ /// - A description of what you want
+ /// - Which language to use
+ /// - Optionally: examples and constraints
+ ///
+ /// Like filling out an order form for a custom program.
+ ///
+ ///
+ public ProgramInput(
+ string? description = null,
+ ProgramLanguage targetLanguage = ProgramLanguage.Generic,
+ List<(string, string)>? examples = null,
+ List? constraints = null)
+ {
+ Description = description;
+ TargetLanguage = targetLanguage;
+ Examples = examples;
+ Constraints = constraints;
+ }
+
+ ///
+ /// Initializes a new instance of the class with default values.
+ ///
+ ///
+ ///
+ /// Creates an empty ProgramInput that can be populated later.
+ ///
+ /// For Beginners: Creates an empty specification to fill in later.
+ ///
+ /// Sometimes you want to create the object first and add details later.
+ /// This creates an empty form you can fill in step by step.
+ ///
+ ///
+ public ProgramInput()
+ {
+ TargetLanguage = ProgramLanguage.Generic;
+ }
+
+ ///
+ /// Adds an input-output example to the Examples list.
+ ///
+ /// The example input.
+ /// The expected output for this input.
+ ///
+ ///
+ /// Convenience method to add examples one at a time instead of creating
+ /// the entire list upfront.
+ ///
+ /// For Beginners: This adds one example at a time.
+ ///
+ /// Instead of providing all examples at once, you can add them one by one:
+ /// programInput.AddExample("[1,2,3]", "6");
+ /// programInput.AddExample("[4,5]", "9");
+ ///
+ /// Easier than creating the list yourself.
+ ///
+ ///
+ public void AddExample(string input, string expectedOutput)
+ {
+ Examples ??= new List<(string, string)>();
+ Examples.Add((input, expectedOutput));
+ }
+
+ ///
+ /// Adds a test case to the TestCases list.
+ ///
+ /// The test input.
+ /// The expected output for this input.
+ ///
+ ///
+ /// Convenience method to add test cases one at a time.
+ ///
+ /// For Beginners: This adds one test case at a time.
+ ///
+ /// Similar to AddExample, but for test cases that verify correctness:
+ /// programInput.AddTestCase("[10,20]", "30");
+ ///
+ ///
+ public void AddTestCase(string input, string expectedOutput)
+ {
+ TestCases ??= new List<(string, string)>();
+ TestCases.Add((input, expectedOutput));
+ }
+
+ ///
+ /// Adds a constraint to the Constraints list.
+ ///
+ /// The constraint to add.
+ ///
+ ///
+ /// Convenience method to add constraints one at a time.
+ ///
+ /// For Beginners: This adds one constraint at a time.
+ ///
+ /// Add requirements one by one:
+ /// programInput.AddConstraint("Must run in O(n) time");
+ /// programInput.AddConstraint("Should not use recursion");
+ ///
+ ///
+ public void AddConstraint(string constraint)
+ {
+ Constraints ??= new List();
+ Constraints.Add(constraint);
+ }
+}
diff --git a/tests/AiDotNet.Tests/UnitTests/ProgramSynthesis/CodeSynthesisArchitectureTests.cs b/tests/AiDotNet.Tests/UnitTests/ProgramSynthesis/CodeSynthesisArchitectureTests.cs
new file mode 100644
index 000000000..5d1ed92a7
--- /dev/null
+++ b/tests/AiDotNet.Tests/UnitTests/ProgramSynthesis/CodeSynthesisArchitectureTests.cs
@@ -0,0 +1,103 @@
+using AiDotNet.Enums;
+using AiDotNet.ProgramSynthesis.Enums;
+using AiDotNet.ProgramSynthesis.Models;
+using Xunit;
+
+namespace AiDotNetTests.UnitTests.ProgramSynthesis;
+
+///
+/// Unit tests for CodeSynthesisArchitecture class.
+///
+public class CodeSynthesisArchitectureTests
+{
+ [Fact]
+ public void Constructor_ValidParameters_CreatesInstance()
+ {
+ // Arrange & Act
+ var architecture = new CodeSynthesisArchitecture(
+ synthesisType: SynthesisType.Neural,
+ targetLanguage: ProgramLanguage.Python,
+ codeTaskType: CodeTask.Generation,
+ numEncoderLayers: 6,
+ numDecoderLayers: 6,
+ numHeads: 8,
+ modelDimension: 512,
+ feedForwardDimension: 2048,
+ maxSequenceLength: 512,
+ vocabularySize: 50000,
+ maxProgramLength: 100);
+
+ // Assert
+ Assert.NotNull(architecture);
+ Assert.Equal(SynthesisType.Neural, architecture.SynthesisType);
+ Assert.Equal(ProgramLanguage.Python, architecture.TargetLanguage);
+ Assert.Equal(CodeTask.Generation, architecture.CodeTaskType);
+ Assert.Equal(6, architecture.NumEncoderLayers);
+ Assert.Equal(6, architecture.NumDecoderLayers);
+ Assert.Equal(8, architecture.NumHeads);
+ Assert.Equal(512, architecture.ModelDimension);
+ Assert.Equal(2048, architecture.FeedForwardDimension);
+ Assert.Equal(512, architecture.MaxSequenceLength);
+ Assert.Equal(50000, architecture.VocabularySize);
+ Assert.Equal(100, architecture.MaxProgramLength);
+ }
+
+ [Fact]
+ public void Constructor_DefaultValues_CreatesInstanceWithDefaults()
+ {
+ // Arrange & Act
+ var architecture = new CodeSynthesisArchitecture(
+ synthesisType: SynthesisType.Hybrid,
+ targetLanguage: ProgramLanguage.CSharp,
+ codeTaskType: CodeTask.Completion);
+
+ // Assert
+ Assert.NotNull(architecture);
+ Assert.Equal(6, architecture.NumEncoderLayers);
+ Assert.Equal(0, architecture.NumDecoderLayers);
+ Assert.Equal(8, architecture.NumHeads);
+ Assert.Equal(512, architecture.ModelDimension);
+ Assert.Equal(0.1, architecture.DropoutRate);
+ Assert.True(architecture.UsePositionalEncoding);
+ Assert.False(architecture.UseDataFlow);
+ }
+
+ [Fact]
+ public void Constructor_WithDataFlow_SetsDataFlowCorrectly()
+ {
+ // Arrange & Act
+ var architecture = new CodeSynthesisArchitecture(
+ synthesisType: SynthesisType.Neural,
+ targetLanguage: ProgramLanguage.Java,
+ codeTaskType: CodeTask.BugDetection,
+ useDataFlow: true);
+
+ // Assert
+ Assert.True(architecture.UseDataFlow);
+ }
+
+ [Fact]
+ public void Constructor_DifferentLanguages_CreatesCorrectInstances()
+ {
+ // Arrange & Act
+ var pythonArch = new CodeSynthesisArchitecture(
+ SynthesisType.Neural,
+ ProgramLanguage.Python,
+ CodeTask.Generation);
+
+ var javaArch = new CodeSynthesisArchitecture(
+ SynthesisType.Neural,
+ ProgramLanguage.Java,
+ CodeTask.Translation);
+
+ var csharpArch = new CodeSynthesisArchitecture(
+ SynthesisType.Neural,
+ ProgramLanguage.CSharp,
+ CodeTask.Refactoring);
+
+ // Assert
+ Assert.Equal(ProgramLanguage.Python, pythonArch.TargetLanguage);
+ Assert.Equal(ProgramLanguage.Java, javaArch.TargetLanguage);
+ Assert.Equal(ProgramLanguage.CSharp, csharpArch.TargetLanguage);
+ }
+}
diff --git a/tests/AiDotNet.Tests/UnitTests/ProgramSynthesis/ProgramInputTests.cs b/tests/AiDotNet.Tests/UnitTests/ProgramSynthesis/ProgramInputTests.cs
new file mode 100644
index 000000000..f06eea827
--- /dev/null
+++ b/tests/AiDotNet.Tests/UnitTests/ProgramSynthesis/ProgramInputTests.cs
@@ -0,0 +1,141 @@
+using AiDotNet.ProgramSynthesis.Enums;
+using AiDotNet.ProgramSynthesis.Models;
+using Xunit;
+
+namespace AiDotNetTests.UnitTests.ProgramSynthesis;
+
+///
+/// Unit tests for ProgramInput class.
+///
+public class ProgramInputTests
+{
+ [Fact]
+ public void Constructor_WithParameters_CreatesInstance()
+ {
+ // Arrange
+ const string description = "Create a function that sorts a list";
+ var examples = new List<(string, string)>
+ {
+ ("[3, 1, 2]", "[1, 2, 3]"),
+ ("[5, 4]", "[4, 5]")
+ };
+ var constraints = new List { "Must use O(n log n) algorithm" };
+
+ // Act
+ var input = new ProgramInput(
+ description,
+ ProgramLanguage.Python,
+ examples,
+ constraints);
+
+ // Assert
+ Assert.NotNull(input);
+ Assert.Equal(description, input.Description);
+ Assert.Equal(ProgramLanguage.Python, input.TargetLanguage);
+ Assert.Equal(2, input.Examples?.Count);
+ Assert.Single(input.Constraints ?? new List());
+ }
+
+ [Fact]
+ public void Constructor_DefaultConstructor_CreatesEmptyInstance()
+ {
+ // Act
+ var input = new ProgramInput();
+
+ // Assert
+ Assert.NotNull(input);
+ Assert.Equal(ProgramLanguage.Generic, input.TargetLanguage);
+ Assert.Null(input.Description);
+ Assert.Null(input.Examples);
+ }
+
+ [Fact]
+ public void AddExample_AddsExampleCorrectly()
+ {
+ // Arrange
+ var input = new ProgramInput();
+
+ // Act
+ input.AddExample("[1, 2, 3]", "6");
+ input.AddExample("[4, 5]", "9");
+
+ // Assert
+ Assert.NotNull(input.Examples);
+ Assert.Equal(2, input.Examples.Count);
+ Assert.Equal(("[1, 2, 3]", "6"), input.Examples[0]);
+ Assert.Equal(("[4, 5]", "9"), input.Examples[1]);
+ }
+
+ [Fact]
+ public void AddTestCase_AddsTestCaseCorrectly()
+ {
+ // Arrange
+ var input = new ProgramInput();
+
+ // Act
+ input.AddTestCase("[10]", "10");
+ input.AddTestCase("[1, 1, 1]", "3");
+
+ // Assert
+ Assert.NotNull(input.TestCases);
+ Assert.Equal(2, input.TestCases.Count);
+ Assert.Equal(("[10]", "10"), input.TestCases[0]);
+ Assert.Equal(("[1, 1, 1]", "3"), input.TestCases[1]);
+ }
+
+ [Fact]
+ public void AddConstraint_AddsConstraintCorrectly()
+ {
+ // Arrange
+ var input = new ProgramInput();
+
+ // Act
+ input.AddConstraint("Must be fast");
+ input.AddConstraint("Should be readable");
+
+ // Assert
+ Assert.NotNull(input.Constraints);
+ Assert.Equal(2, input.Constraints.Count);
+ Assert.Contains("Must be fast", input.Constraints);
+ Assert.Contains("Should be readable", input.Constraints);
+ }
+
+ [Fact]
+ public void Properties_SettersAndGetters_WorkCorrectly()
+ {
+ // Arrange
+ var input = new ProgramInput();
+
+ // Act
+ input.Description = "Generate a sorting function";
+ input.TargetLanguage = ProgramLanguage.Java;
+ input.FormalSpecification = "∀x∀y: x < y ⇒ sorted[x] ≤ sorted[y]";
+ input.MaxComplexity = 50;
+ input.TimeoutMs = 5000;
+
+ // Assert
+ Assert.Equal("Generate a sorting function", input.Description);
+ Assert.Equal(ProgramLanguage.Java, input.TargetLanguage);
+ Assert.Equal("∀x∀y: x < y ⇒ sorted[x] ≤ sorted[y]", input.FormalSpecification);
+ Assert.Equal(50, input.MaxComplexity);
+ Assert.Equal(5000, input.TimeoutMs);
+ }
+
+ [Fact]
+ public void AddExample_MultipleTimesSeparately_MaintainsOrder()
+ {
+ // Arrange
+ var input = new ProgramInput();
+
+ // Act
+ input.AddExample("input1", "output1");
+ input.AddExample("input2", "output2");
+ input.AddExample("input3", "output3");
+
+ // Assert
+ Assert.Equal(3, input.Examples?.Count);
+ Assert.Equal("input1", input.Examples?[0].Item1);
+ Assert.Equal("output2", input.Examples?[1].Item2);
+ Assert.Equal("input3", input.Examples?[2].Item1);
+ }
+}
diff --git a/tests/AiDotNet.Tests/UnitTests/ProgramSynthesis/ProgramTests.cs b/tests/AiDotNet.Tests/UnitTests/ProgramSynthesis/ProgramTests.cs
new file mode 100644
index 000000000..a6900fded
--- /dev/null
+++ b/tests/AiDotNet.Tests/UnitTests/ProgramSynthesis/ProgramTests.cs
@@ -0,0 +1,105 @@
+using AiDotNet.ProgramSynthesis.Enums;
+using AiDotNet.ProgramSynthesis.Models;
+using Xunit;
+
+namespace AiDotNetTests.UnitTests.ProgramSynthesis;
+
+///
+/// Unit tests for Program class.
+///
+public class ProgramTests
+{
+ [Fact]
+ public void Constructor_ValidParameters_CreatesInstance()
+ {
+ // Arrange
+ const string sourceCode = "def add(a, b):\n return a + b";
+ const ProgramLanguage language = ProgramLanguage.Python;
+
+ // Act
+ var program = new Program(sourceCode, language, isValid: true, fitnessScore: 1.0, complexity: 2);
+
+ // Assert
+ Assert.NotNull(program);
+ Assert.Equal(sourceCode, program.SourceCode);
+ Assert.Equal(language, program.Language);
+ Assert.True(program.IsValid);
+ Assert.Equal(1.0, program.FitnessScore);
+ Assert.Equal(2, program.Complexity);
+ }
+
+ [Fact]
+ public void Constructor_DefaultConstructor_CreatesEmptyProgram()
+ {
+ // Act
+ var program = new Program();
+
+ // Assert
+ Assert.NotNull(program);
+ Assert.Empty(program.SourceCode);
+ Assert.Equal(ProgramLanguage.Generic, program.Language);
+ Assert.False(program.IsValid);
+ Assert.Equal(0.0, program.FitnessScore);
+ Assert.Equal(0, program.Complexity);
+ }
+
+ [Fact]
+ public void Properties_SettersAndGetters_WorkCorrectly()
+ {
+ // Arrange
+ var program = new Program();
+
+ // Act
+ program.SourceCode = "print('Hello, World!')";
+ program.Language = ProgramLanguage.Python;
+ program.IsValid = true;
+ program.FitnessScore = 0.95;
+ program.Complexity = 1;
+ program.ErrorMessage = null;
+ program.ExecutionTimeMs = 5.5;
+
+ // Assert
+ Assert.Equal("print('Hello, World!')", program.SourceCode);
+ Assert.Equal(ProgramLanguage.Python, program.Language);
+ Assert.True(program.IsValid);
+ Assert.Equal(0.95, program.FitnessScore);
+ Assert.Equal(1, program.Complexity);
+ Assert.Null(program.ErrorMessage);
+ Assert.Equal(5.5, program.ExecutionTimeMs);
+ }
+
+ [Fact]
+ public void ToString_ReturnsFormattedString()
+ {
+ // Arrange
+ var program = new Program(
+ "x = 5",
+ ProgramLanguage.Python,
+ isValid: true,
+ fitnessScore: 0.75,
+ complexity: 1);
+
+ // Act
+ var result = program.ToString();
+
+ // Assert
+ Assert.Contains("[Python]", result);
+ Assert.Contains("Valid: True", result);
+ Assert.Contains("Fitness: 0.75", result);
+ Assert.Contains("Complexity: 1", result);
+ Assert.Contains("x = 5", result);
+ }
+
+ [Fact]
+ public void ErrorMessage_WhenSet_StoresCorrectly()
+ {
+ // Arrange
+ var program = new Program("invalid code", ProgramLanguage.Python, false);
+
+ // Act
+ program.ErrorMessage = "Syntax error on line 1";
+
+ // Assert
+ Assert.Equal("Syntax error on line 1", program.ErrorMessage);
+ }
+}