JuliaDecisionFocusedLearning
diff --git a/‎Project.toml‎
Lines changed: 4 additions & 0 deletions b/‎Project.toml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/consistent_signature.jl‎
Lines changed: 153 additions & 0 deletions b/‎examples/consistent_signature.jl‎
Lines changed: 153 additions & 0 deletions
diff --git a/‎examples/two_argument_signature.jl‎
Lines changed: 157 additions & 0 deletions b/‎examples/two_argument_signature.jl‎
Lines changed: 157 additions & 0 deletions
diff --git a/‎examples/using_mvhistory.jl‎
Lines changed: 50 additions & 0 deletions b/‎examples/using_mvhistory.jl‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎scripts/Project.toml‎
Lines changed: 3 additions & 0 deletions b/‎scripts/Project.toml‎
Lines changed: 3 additions & 0 deletions
@@ -9,15 +9,19 @@ Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 InferOpt = "4846b161-c94e-4150-8dac-c7ae193c601f"
 MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
+ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7"
 
 [compat]
 DecisionFocusedLearningBenchmarks = "0.3.0"
 Flux = "0.16.5"
 InferOpt = "0.7.1"
 MLUtils = "0.4.8"
 ProgressMeter = "1.11.0"
+Statistics = "1.11.1"
 UnicodePlots = "3.8.1"
+ValueHistories = "0.5.4"
 julia = "1.11"
 
 [extras]
 
@@ -0,0 +1,153 @@
+# Consistent Metric Function Signature
+
+using DecisionFocusedLearningAlgorithms
+using DecisionFocusedLearningBenchmarks
+using MLUtils: splitobs
+using Statistics
+
+b = ArgmaxBenchmark()
+dataset = generate_dataset(b, 100)
+train_instances, val_instances, test_instances = splitobs(dataset; at=(0.3, 0.3, 0.4))
+
+model = generate_statistical_model(b; seed=0)
+maximizer = generate_maximizer(b)
+
+# ============================================================================
+# NEW: ALL metric functions have the SAME signature!
+# (model, maximizer, data, context) -> value
+# ============================================================================
+
+# Simple metric - just uses model, maximizer, and data
+compute_gap = (model, max, data, ctx) -> compute_gap(b, data, model, max)
+
+# Metric that also uses context
+compute_gap_ratio =
+    (model, max, data, ctx) -> begin
+        # data is the dataset from 'on' parameter
+        # context gives access to everything else
+        train_gap = compute_gap(b, ctx.train_dataset, model, max)
+        data_gap = compute_gap(b, data, model, max)
+        return train_gap / data_gap
+    end
+
+# Metric that ignores data, just uses context
+get_epoch = (model, max, data, ctx) -> ctx.epoch
+
+# Metric that uses everything
+complex_metric = (model, max, data, ctx) -> begin
+    # Can access:
+    # - model, max (always provided)
+    # - data (the dataset from 'on')
+    # - ctx.epoch
+    # - ctx.train_dataset, ctx.validation_dataset
+    # - ctx.training_loss, ctx.validation_loss
+    gap = compute_gap(b, data, model, max)
+    return gap * ctx.epoch  # silly example, but shows flexibility
+end
+
+# ============================================================================
+# Usage - Same function signature works everywhere!
+# ============================================================================
+
+callbacks = [
+    # on=:validation (default) - data will be validation_dataset
+    Metric(:gap, compute_gap),
+    # Creates: val_gap
+
+    # on=:both - function called twice with train and val datasets
+    Metric(:gap, compute_gap; on=:both),
+    # Creates: train_gap, val_gap
+
+    # on=test_instances - data will be test_instances
+    Metric(:test_gap, compute_gap; on=test_instances),
+    # Creates: test_gap
+
+    # Complex metric using context
+    Metric(:gap_ratio, compute_gap_ratio; on=:validation),
+    # Creates: val_gap_ratio
+
+    # Ignore data parameter completely
+    Metric(:current_epoch, get_epoch),
+    # Creates: val_current_epoch (on=:validation by default)
+]
+
+# ============================================================================
+# Benefits of Consistent Signature
+# ============================================================================
+
+# ✅ ALWAYS the same signature: (model, max, data, ctx) -> value
+# ✅ No confusion about what arguments metric_fn receives
+# ✅ Easy to write - just follow one pattern
+# ✅ Easy to compose - all functions compatible
+# ✅ Full flexibility - context gives access to everything
+# ✅ Can ignore unused parameters (data or parts of context)
+
+# ============================================================================
+# Comparison: OLD vs NEW
+# ============================================================================
+
+# OLD (inconsistent signatures):
+# on=nothing    → metric_fn(context)                     # 1 arg
+# on=:both      → metric_fn(model, maximizer, dataset)   # 3 args
+# on=data       → metric_fn(model, maximizer, data)      # 3 args
+# 😕 Confusing! Different signatures for different modes!
+
+# NEW (consistent signature):
+# Always: metric_fn(model, maximizer, data, context)     # 4 args
+# ✨ Clear! Same signature everywhere!
+
+# ============================================================================
+# Practical Example: Define metrics once, use everywhere
+# ============================================================================
+
+# Define your metrics library with consistent signature
+module MyMetrics
+gap(model, max, data, ctx) = compute_gap(benchmark, data, model, max)
+regret(model, max, data, ctx) = compute_regret(benchmark, data, model, max)
+accuracy(model, max, data, ctx) = compute_accuracy(benchmark, data, model, max)
+
+# Complex metric using context
+function overfitting_indicator(model, max, data, ctx)
+    train_metric = gap(model, max, ctx.train_dataset, ctx)
+    val_metric = gap(model, max, ctx.validation_dataset, ctx)
+    return val_metric - train_metric
+end
+end
+
+# Use them easily
+callbacks = [
+    Metric(:gap, MyMetrics.gap; on=:both),
+    Metric(:regret, MyMetrics.regret; on=:both),
+    Metric(:test_accuracy, MyMetrics.accuracy; on=test_instances),
+    Metric(:overfitting, MyMetrics.overfitting_indicator),
+]
+
+# ============================================================================
+# Advanced: Higher-order functions
+# ============================================================================
+
+# Create a metric factory that returns properly-signed functions
+function dataset_metric(benchmark, compute_fn)
+    return (model, max, data, ctx) -> compute_fn(benchmark, data, model, max)
+end
+
+# Use it
+callbacks = [
+    Metric(:gap, dataset_metric(b, compute_gap); on=:both),
+    Metric(:regret, dataset_metric(b, compute_regret); on=:both),
+]
+
+# ============================================================================
+# Migration Helper
+# ============================================================================
+
+# If you have old-style functions: (model, max, data) -> value
+# Wrap them easily:
+old_compute_gap = (model, max, data) -> compute_gap(b, data, model, max)
+
+# Convert to new signature:
+new_compute_gap = (model, max, data, ctx) -> old_compute_gap(model, max, data)
+# Or more concisely:
+new_compute_gap = (model, max, data, _) -> old_compute_gap(model, max, data)
+
+Metric(:gap, new_compute_gap; on=:both)
@@ -0,0 +1,157 @@
+# Simplified Metric Signature - Just (data, context)!
+
+using DecisionFocusedLearningAlgorithms
+using DecisionFocusedLearningBenchmarks
+using MLUtils: splitobs
+
+b = ArgmaxBenchmark()
+dataset = generate_dataset(b, 100)
+train, val, test = splitobs(dataset; at=(0.3, 0.3, 0.4))
+model = generate_statistical_model(b)
+maximizer = generate_maximizer(b)
+
+# ============================================================================
+# NEW: Metric functions take just 2 arguments: (data, context)
+# Everything you need is in context!
+# ============================================================================
+
+# Simple metric - model and maximizer from context
+compute_gapp = (data, ctx) -> compute_gap(b, data, ctx.model, ctx.maximizer)
+
+# Complex metric - access other datasets from context
+compute_ratio =
+    (data, ctx) -> begin
+        train_gap = compute_gap(b, ctx.train_dataset, ctx.model, ctx.maximizer)
+        val_gap = compute_gap(b, data, ctx.model, ctx.maximizer)
+        return train_gap / val_gap
+    end
+
+# Context-only metrics - ignore data completely
+get_epoch = (_, ctx) -> ctx.epoch
+
+# ============================================================================
+# Usage Examples
+# ============================================================================
+
+callbacks = [
+    # Default: on=:validation
+    Metric(:gap, compute_gap),
+    # Creates: val_gap
+
+    # Automatic train and validation
+    Metric(:gap, compute_gapp; on=:both),
+    # Creates: train_gap, val_gap
+
+    # Specific test set
+    Metric(:test_gap, compute_gapp; on=test),
+    # Creates: test_gap
+
+    # Complex metric using context
+    Metric(:gap_ratio, compute_ratio),
+    # Creates: val_gap_ratio
+
+    # Context-only metrics
+    Metric(:current_epoch, get_epoch),
+]
+
+# Note: training_loss and validation_loss are automatically tracked in history!
+# Access them with: get(history, :training_loss), get(history, :validation_loss)
+
+history = fyl_train_model!(model, maximizer, train, val; epochs=100, callbacks=callbacks)
+
+# ============================================================================
+# Why This is Better
+# ============================================================================
+
+# BEFORE: Redundant parameters (4 arguments)
+# metric_fn(model, maximizer, data, context)
+# - model and maximizer are ALSO in context (redundant!)
+# - Longer signature
+# - More typing
+
+# AFTER: Clean and minimal (2 arguments)
+# metric_fn(data, context)
+# - Get model from ctx.model
+# - Get maximizer from ctx.maximizer
+# - Everything in one place (context)
+# - Shorter, cleaner
+
+# ============================================================================
+# Real-World Example
+# ============================================================================
+
+# Define your metric functions
+compute_gap = (data, ctx) -> compute_gap(benchmark, data, ctx.model, ctx.maximizer)
+compute_regret = (data, ctx) -> compute_regret(benchmark, data, ctx.model, ctx.maximizer)
+
+# Metric that uses multiple datasets
+overfitting_indicator =
+    (data, ctx) -> begin
+        train_metric = compute_gap(b, ctx.train_dataset, ctx.model, ctx.maximizer)
+        val_metric = compute_gap(b, ctx.validation_dataset, ctx.model, ctx.maximizer)
+        return val_metric - train_metric
+    end
+
+# Metric that evaluates policy on environments
+eval_policy = (envs, ctx) -> begin
+    policy = Policy("", "", PolicyWrapper(ctx.model))
+    rewards, _ = evaluate_policy!(policy, envs, 100)
+    return mean(rewards)
+end
+
+test_envs = generate_environments(b, test)
+
+callbacks = [
+    Metric(:gap, compute_gap; on=:both),
+    Metric(:regret, compute_regret; on=:both),
+    Metric(:test_gap, compute_gap; on=test),
+    Metric(:overfitting, overfitting_indicator),
+    Metric(:test_reward, eval_policy; on=test_envs),
+]
+
+# ============================================================================
+# Metric Library Pattern
+# ============================================================================
+
+# Create a module with all your metrics
+module MyMetrics
+gap(data, ctx) = compute_gap(benchmark, data, ctx.model, ctx.maximizer)
+regret(data, ctx) = compute_regret(benchmark, data, ctx.model, ctx.maximizer)
+
+# More complex metrics
+overfitting(data, ctx) = begin
+    train = gap(ctx.train_dataset, ctx)
+    val = gap(ctx.validation_dataset, ctx)
+    return val - train
+end
+end
+
+# Use them
+callbacks = [
+    Metric(:gap, MyMetrics.gap; on=:both),
+    Metric(:regret, MyMetrics.regret; on=:both),
+    Metric(:overfitting, MyMetrics.overfitting),
+]
+
+# ============================================================================
+# Migration from 4-argument signature
+# ============================================================================
+
+# If you have old 4-argument functions:
+old_metric = (model, max, data, ctx) -> compute_gap(b, data, model, max)
+
+# Convert to new 2-argument:
+new_metric = (data, ctx) -> compute_gap(b, data, ctx.model, ctx.maximizer)
+
+# Or just update inline:
+Metric(:gap, (data, ctx) -> compute_gap(b, data, ctx.model, ctx.maximizer); on=:both)
+
+# ============================================================================
+# Benefits Summary
+# ============================================================================
+
+# ✅ Cleaner: 2 arguments instead of 4
+# ✅ Less redundancy: No duplicate model/maximizer
+# ✅ Consistent: Everything from context
+# ✅ Simpler: Less to type and remember
+# ✅ Flexible: Context has everything you need
@@ -0,0 +1,50 @@
+# Using MVHistory for Metrics Storage
+
+using DecisionFocusedLearningAlgorithms
+using DecisionFocusedLearningBenchmarks
+using MLUtils: splitobs
+using ValueHistories
+using Plots
+
+b = ArgmaxBenchmark()
+dataset = generate_dataset(b, 100)
+train_instances, val_instances, test_instances = splitobs(dataset; at=(0.3, 0.3, 0.4))
+
+model = generate_statistical_model(b; seed=0)
+maximizer = generate_maximizer(b)
+
+compute_gap_fn = (m, max, data) -> compute_gap(b, data, m, max)
+
+# Define callbacks
+callbacks = [
+    Metric(:gap, compute_gap_fn; on=:both),
+    Metric(:test_gap, compute_gap_fn; on=test_instances),
+]
+
+# Train and get MVHistory back
+history = fyl_train_model!(
+    model, maximizer, train_instances, val_instances; epochs=100, callbacks=callbacks
+)
+
+# ============================================================================
+# Working with MVHistory - Much Cleaner!
+# ============================================================================
+
+# Get values and iterations
+epochs, train_losses = get(history, :training_loss)
+epochs, val_losses = get(history, :validation_loss)
+epochs, train_gaps = get(history, :train_gap)
+epochs, val_gaps = get(history, :val_gap)
+test_epochs, test_gaps = get(history, :test_gap)
+
+# Plot multiple metrics
+plot(epochs, train_losses; label="Train Loss")
+plot!(epochs, val_losses; label="Val Loss")
+
+plot(epochs, train_gaps; label="Train Gap")
+plot!(epochs, val_gaps; label="Val Gap")
+plot!(test_epochs, test_gaps; label="Test Gap")
+
+using JLD2
+@save "training_history.jld2" history
+@load "training_history.jld2" history
@@ -1,6 +1,9 @@
 [deps]
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DecisionFocusedLearningAlgorithms = "46d52364-bc3b-4fac-a992-eb1d3ef2de15"
 DecisionFocusedLearningBenchmarks = "2fbe496a-299b-4c81-bab5-c44dfc55cf20"
+JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f"
+ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7"