Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions src/MetaLearning/Algorithms/IMetaLearningAlgorithm.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
using AiDotNet.Interfaces;
using AiDotNet.MetaLearning.Data;
using AiDotNet.Models;

namespace AiDotNet.MetaLearning.Algorithms;

/// <summary>
/// Represents a meta-learning algorithm that can learn from multiple tasks.
/// </summary>
/// <typeparam name="T">The numeric type used for calculations (e.g., double, float).</typeparam>
/// <typeparam name="TInput">The input data type (e.g., Matrix<T>, Tensor<T>).</typeparam>
/// <typeparam name="TOutput">The output data type (e.g., Vector<T>, Tensor<T>).</typeparam>
/// <remarks>
/// <para>
/// <b>For Beginners:</b> Meta-learning is "learning to learn" - the algorithm practices
/// adapting to new tasks quickly by training on many different tasks.
///
/// Think of it like learning to learn languages:
/// - Instead of just learning one language, you learn many languages
/// - Over time, you get better at picking up new languages quickly
/// - When you encounter a new language, you can learn it faster than the first time
///
/// Similarly, a meta-learning algorithm:
/// - Trains on many different tasks
/// - Learns patterns that help it adapt quickly to new tasks
/// - Can solve new tasks with just a few examples (few-shot learning)
/// </para>
/// </remarks>
public interface IMetaLearningAlgorithm<T, TInput, TOutput>
{
/// <summary>
/// Performs one meta-training step on a batch of tasks.
/// </summary>
/// <param name="taskBatch">The batch of tasks to train on.</param>
/// <returns>The meta-training loss for this batch.</returns>
/// <remarks>
/// <para>
/// <b>For Beginners:</b> This method updates the model by training on multiple tasks at once.
/// Each task teaches the model something about how to learn quickly. The returned loss value
/// indicates how well the model is doing - lower is better.
/// </para>
/// </remarks>
T MetaTrain(TaskBatch<T, TInput, TOutput> taskBatch);

/// <summary>
/// Adapts the model to a new task using its support set.
/// </summary>
/// <param name="task">The task to adapt to.</param>
/// <returns>A new model instance adapted to the task.</returns>
/// <remarks>
/// <para>
/// <b>For Beginners:</b> This is where the "quick learning" happens. Given a new task
/// with just a few examples (the support set), this method creates a new model that's
/// specialized for that specific task. This is what makes meta-learning powerful -
/// it can adapt to new tasks with very few examples.
/// </para>
/// </remarks>
IModel<TInput, TOutput, ModelMetadata<T>> Adapt(ITask<T, TInput, TOutput> task);

/// <summary>
/// Evaluates the meta-learning algorithm on a batch of tasks.
/// </summary>
/// <param name="taskBatch">The batch of tasks to evaluate on.</param>
/// <returns>The average evaluation loss across all tasks.</returns>
/// <remarks>
/// <para>
/// <b>For Beginners:</b> This checks how well the meta-learning algorithm performs.
/// For each task, it adapts using the support set and then tests on the query set.
/// The returned value is the average loss across all tasks - lower means better performance.
/// </para>
/// </remarks>
T Evaluate(TaskBatch<T, TInput, TOutput> taskBatch);

/// <summary>
/// Gets the base model used by this meta-learning algorithm.
/// </summary>
/// <returns>The base model.</returns>
/// <remarks>
/// <para>
/// <b>For Beginners:</b> This returns the "meta-learned" model that has been trained
/// on many tasks. This model itself may not be very good at any specific task, but it's
/// excellent as a starting point for quickly adapting to new tasks.
/// </para>
/// </remarks>
IFullModel<T, TInput, TOutput> GetMetaModel();

/// <summary>
/// Sets the base model for this meta-learning algorithm.
/// </summary>
/// <param name="model">The model to use as the base.</param>
void SetMetaModel(IFullModel<T, TInput, TOutput> model);

/// <summary>
/// Gets the name of the meta-learning algorithm.
/// </summary>
string AlgorithmName { get; }

/// <summary>
/// Gets the number of adaptation steps to perform during task adaptation.
/// </summary>
int AdaptationSteps { get; }

/// <summary>
/// Gets the learning rate used for task adaptation (inner loop).
/// </summary>
double InnerLearningRate { get; }

/// <summary>
/// Gets the learning rate used for meta-learning (outer loop).
/// </summary>
double OuterLearningRate { get; }
}
184 changes: 184 additions & 0 deletions src/MetaLearning/Algorithms/MAMLAlgorithm.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
using AiDotNet.Interfaces;
using AiDotNet.MetaLearning.Data;
using AiDotNet.Models;
using AiDotNet.Models.Options;

namespace AiDotNet.MetaLearning.Algorithms;

/// <summary>
/// Implementation of the MAML (Model-Agnostic Meta-Learning) algorithm.
/// </summary>
/// <typeparam name="T">The numeric type used for calculations (e.g., double, float).</typeparam>
/// <typeparam name="TInput">The input data type (e.g., Matrix<T>, Tensor<T>).</typeparam>
/// <typeparam name="TOutput">The output data type (e.g., Vector<T>, Tensor<T>).</typeparam>
/// <remarks>
/// <para>
/// MAML (Model-Agnostic Meta-Learning) is a meta-learning algorithm that trains models
/// to be easily fine-tunable. It learns initial parameters such that a small number of
/// gradient steps on a new task will lead to good performance.
/// </para>
/// <para>
/// Key features:
/// - Model-agnostic: works with any model trainable with gradient descent
/// - Learns good initialization rather than learning a fixed feature extractor
/// - Enables few-shot learning with just 1-5 examples per class
/// </para>
/// <para>
/// <b>For Beginners:</b> MAML is like teaching someone how to learn quickly.
///
/// Normal machine learning: Train a model for one specific task
/// MAML: Train a model to be easily trainable for many different tasks
///
/// It's like learning how to learn - by practicing on many tasks, the model
/// learns what kind of parameters make it easy to adapt to new tasks quickly.
/// </para>
/// <para>
/// Reference: Finn, C., Abbeel, P., & Levine, S. (2017).
/// Model-agnostic meta-learning for fast adaptation of deep networks.
/// </para>
/// </remarks>
public class MAMLAlgorithm<T, TInput, TOutput> : MetaLearningBase<T, TInput, TOutput>
{
private readonly MAMLAlgorithmOptions<T, TInput, TOutput> _mamlOptions;

/// <summary>
/// Initializes a new instance of the MAMLAlgorithm class.
/// </summary>
/// <param name="options">The configuration options for MAML.</param>
public MAMLAlgorithm(MAMLAlgorithmOptions<T, TInput, TOutput> options) : base(options)
{
_mamlOptions = options;
}

/// <inheritdoc/>
public override string AlgorithmName => "MAML";

/// <inheritdoc/>
public override T MetaTrain(TaskBatch<T, TInput, TOutput> taskBatch)
{
if (taskBatch == null || taskBatch.BatchSize == 0)
{
throw new ArgumentException("Task batch cannot be null or empty.", nameof(taskBatch));
}

// Accumulate meta-gradients across all tasks
Vector<T>? metaGradients = null;
T totalMetaLoss = NumOps.Zero;

foreach (var task in taskBatch.Tasks)
{
// Clone the meta model for this task
var taskModel = CloneModel();
var initialParams = taskModel.GetParameters();

// Inner loop: Adapt to the task using support set
var adaptedParams = InnerLoopAdaptation(taskModel, task);
taskModel.UpdateParameters(adaptedParams);

Check failure on line 76 in src/MetaLearning/Algorithms/MAMLAlgorithm.cs

View workflow job for this annotation

GitHub Actions / Publish Size Analysis

'IFullModel<T, TInput, TOutput>' does not contain a definition for 'UpdateParameters' and no accessible extension method 'UpdateParameters' accepting a first argument of type 'IFullModel<T, TInput, TOutput>' could be found (are you missing a using directive or an assembly reference?)

// Compute meta-loss on query set
var queryPredictions = taskModel.Predict(task.QueryInput);
T metaLoss = LossFunction.ComputeLoss(queryPredictions, task.QueryOutput);
totalMetaLoss = NumOps.Add(totalMetaLoss, metaLoss);

// Compute meta-gradients (gradients with respect to initial parameters)
var taskMetaGradients = ComputeMetaGradients(initialParams, task);
Comment on lines +70 to +84
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Verify redundant adaptation computation in ComputeMetaGradients.

The inner-loop adaptation is performed at line 75, but ComputeMetaGradients (lines 169-183) clones a fresh model with initialParams and re-runs the adaptation from scratch (line 176). This means each task is adapted twice, which is computationally wasteful.

The typical MAML flow should be:

  1. Clone model → adapt → compute query loss (for meta-loss tracking)
  2. Separately: Clone model → adapt → compute gradients on query set

However, the current implementation adapts twice per task. Verify if this is intentional or if ComputeMetaGradients should receive the already-adapted model instead of re-adapting from initialParams.

If this is a bug, consider refactoring to avoid redundant computation. If intentional for gradient computation correctness, add a comment explaining why.



---

🏁 Script executed:

```shell
#!/bin/bash
# Retrieve the MAMLAlgorithm.cs file to examine MetaTrain and ComputeMetaGradients
cat src/MetaLearning/Algorithms/MAMLAlgorithm.cs

Length of output: 6944


Refactor ComputeMetaGradients to eliminate redundant adaptation.

The concern is verified: adaptation happens twice per task. At line 75, InnerLoopAdaptation produces adaptedParams which is used only for computing meta-loss (lines 79-80). Then at line 84, ComputeMetaGradients is called with initialParams, which internally clones the model and re-adapts from scratch (line 179), performing the same K gradient steps again.

This is computationally wasteful. Instead of recomputing adaptation, ComputeMetaGradients should either:

  • Accept pre-computed adaptedParams directly, or
  • Accept the already-adapted model state after line 76

Refactor to pass the adapted state into ComputeMetaGradients rather than regenerating it from initialParams.

🤖 Prompt for AI Agents
In src/MetaLearning/Algorithms/MAMLAlgorithm.cs around lines 70-84, the
inner-loop adaptation is being performed twice: once via InnerLoopAdaptation at
line ~75 to produce adaptedParams and again inside ComputeMetaGradients which
reclones and re-adapts the model; change ComputeMetaGradients to accept the
already computed adapted state (either adaptedParams or the adapted model
instance) along with initialParams so the method uses that adapted state instead
of re-running adaptation, update this call site to pass adaptedParams (or
taskModel) and remove the redundant adaptation logic inside ComputeMetaGradients
(ensure gradients are still computed with respect to initialParams by
backpropagating through the provided adapted state).


// Accumulate meta-gradients
if (metaGradients == null)
{
metaGradients = taskMetaGradients;
}
else
{
for (int i = 0; i < metaGradients.Length; i++)
{
metaGradients[i] = NumOps.Add(metaGradients[i], taskMetaGradients[i]);
}
}
}

if (metaGradients == null)
{
throw new InvalidOperationException("Failed to compute meta-gradients.");
}

// Average the meta-gradients
T batchSize = NumOps.FromDouble(taskBatch.BatchSize);
for (int i = 0; i < metaGradients.Length; i++)
{
metaGradients[i] = NumOps.Divide(metaGradients[i], batchSize);
}

// Outer loop: Update meta-parameters
var currentMetaParams = MetaModel.GetParameters();
var updatedMetaParams = ApplyGradients(currentMetaParams, metaGradients, Options.OuterLearningRate);
MetaModel.UpdateParameters(updatedMetaParams);

// Return average meta-loss
return NumOps.Divide(totalMetaLoss, batchSize);
}

/// <inheritdoc/>
public override IModel<TInput, TOutput, ModelMetadata<T>> Adapt(ITask<T, TInput, TOutput> task)
{
if (task == null)
{
throw new ArgumentNullException(nameof(task));
}

// Clone the meta model
var adaptedModel = CloneModel();

// Perform inner loop adaptation
var adaptedParameters = InnerLoopAdaptation(adaptedModel, task);
adaptedModel.UpdateParameters(adaptedParameters);

return adaptedModel;
}

/// <summary>
/// Performs the inner loop adaptation to a specific task.
/// </summary>
/// <param name="model">The model to adapt.</param>
/// <param name="task">The task to adapt to.</param>
/// <returns>The adapted parameters.</returns>
private Vector<T> InnerLoopAdaptation(IFullModel<T, TInput, TOutput> model, ITask<T, TInput, TOutput> task)
{
var parameters = model.GetParameters();

// Perform K gradient steps on the support set
for (int step = 0; step < Options.AdaptationSteps; step++)
{
// Compute gradients on support set
var gradients = ComputeGradients(model, task.SupportInput, task.SupportOutput);

// Apply gradients with inner learning rate
parameters = ApplyGradients(parameters, gradients, Options.InnerLearningRate);
model.UpdateParameters(parameters);
}

return parameters;
}

/// <summary>
/// Computes meta-gradients for the outer loop update.
/// </summary>
/// <param name="initialParams">The initial parameters before adaptation.</param>
/// <param name="task">The task to compute meta-gradients for.</param>
/// <returns>The meta-gradient vector.</returns>
private Vector<T> ComputeMetaGradients(Vector<T> initialParams, ITask<T, TInput, TOutput> task)
{
// Clone meta model
var model = CloneModel();
model.UpdateParameters(initialParams);

// Adapt to the task
var adaptedParams = InnerLoopAdaptation(model, task);
model.UpdateParameters(adaptedParams);

// Compute gradients on query set (this gives us the meta-gradient)
var metaGradients = ComputeGradients(model, task.QueryInput, task.QueryOutput);

return metaGradients;
}
}
Loading