Skip to content

Commit 74cc3c4

Browse files
Add doc for CreateSweepableEstimator, Parameter and SearchSpace (#6611)
* add doc for createsweepableestimator, pipeline and searchspace * disable new experiment code * fix comment
1 parent 7a2ac5c commit 74cc3c4

File tree

11 files changed

+293
-0
lines changed

11 files changed

+293
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text;
4+
using Microsoft.ML.SearchSpace;
5+
6+
namespace Microsoft.ML.AutoML.Samples
7+
{
8+
public static class ParameterExample
9+
{
10+
public static void Run()
11+
{
12+
// Parameter is essentially a wrapper class over Json.
13+
// Therefore it supports all json types, like integar, number, boolearn, string, etc..
14+
15+
// To create parameter over existing value, use Parameter.From
16+
var intParam = Parameter.FromInt(10);
17+
var doubleParam = Parameter.FromDouble(20);
18+
var boolParam = Parameter.FromBool(false);
19+
20+
// To cast parameter to specific type, use Parameter.AsType
21+
// NOTE: Casting to a wrong type will trigger an argumentException.
22+
var i = intParam.AsType<int>(); // i == 10
23+
var d = doubleParam.AsType<double>(); // d == 20
24+
var b = boolParam.AsType<bool>(); // b == false
25+
}
26+
}
27+
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.ComponentModel;
4+
using System.Diagnostics;
5+
using System.Text;
6+
using System.Text.Json;
7+
using Microsoft.ML.SearchSpace;
8+
using Microsoft.ML.SearchSpace.Option;
9+
10+
namespace Microsoft.ML.AutoML.Samples
11+
{
12+
public static class SearchSpaceExample
13+
{
14+
public static void Run()
15+
{
16+
// The following code shows how to create a SearchSpace for MyParameter.
17+
var myParameterSearchSpace = new SearchSpace<MyParameter>();
18+
19+
// Equivalently, you can also create myParameterSearchSpace from scratch.
20+
var myParameterSearchSpace2 = new SearchSpace.SearchSpace();
21+
22+
// numeric options
23+
myParameterSearchSpace2["IntOption"] = new UniformIntOption(min: -10, max: 10, logBase: false, defaultValue: 0);
24+
myParameterSearchSpace2["SingleOption"] = new UniformSingleOption(min: 1, max: 10, logBase: true, defaultValue: 1);
25+
myParameterSearchSpace2["DoubleOption"] = new UniformDoubleOption(min: -10, max: 10, logBase: false, defaultValue: 0);
26+
27+
// choice options
28+
myParameterSearchSpace2["BoolOption"] = new ChoiceOption(true, false);
29+
myParameterSearchSpace2["StrOption"] = new ChoiceOption("a", "b", "c");
30+
31+
// nest options
32+
var nestedSearchSpace = new SearchSpace.SearchSpace();
33+
nestedSearchSpace["IntOption"] = new UniformIntOption(min: -10, max: 10, logBase: false, defaultValue: 0);
34+
myParameterSearchSpace2["Nest"] = nestedSearchSpace;
35+
36+
// the two search space should be equal
37+
Debug.Assert(myParameterSearchSpace.GetHashCode() == myParameterSearchSpace2.GetHashCode());
38+
}
39+
40+
public class MyParameter
41+
{
42+
[Range((int)-10, 10, 0, false)]
43+
public int IntOption { get; set; }
44+
45+
[Range(1f, 10f, 1f, true)]
46+
public float SingleOption { get; set; }
47+
48+
[Range(-10, 10, false)]
49+
public double DoubleOption { get; set; }
50+
51+
[BooleanChoice]
52+
public bool BoolOption { get; set; }
53+
54+
[Choice("a", "b", "c")]
55+
public string StrOption { get; set; }
56+
57+
[NestOption]
58+
public NestParameter Nest { get; set; }
59+
}
60+
61+
public class NestParameter
62+
{
63+
[Range((int)-10, 10, 0, false)]
64+
public int IntOption { get; set; }
65+
}
66+
}
67+
}
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Threading.Tasks;
6+
using Microsoft.ML.Data;
7+
using Microsoft.ML.SearchSpace;
8+
9+
namespace Microsoft.ML.AutoML.Samples
10+
{
11+
public static class SweepableLightGBMBinaryExperiment
12+
{
13+
class LightGBMOption
14+
{
15+
[Range(4, 32768, init: 4, logBase: false)]
16+
public int NumberOfLeaves { get; set; } = 4;
17+
18+
[Range(4, 32768, init: 4, logBase: false)]
19+
public int NumberOfTrees { get; set; } = 4;
20+
}
21+
22+
public static async Task RunAsync()
23+
{
24+
// This example shows how to use Sweepable API to run hyper-parameter optimization over
25+
// LightGBM trainer with a customized search space.
26+
27+
// Create a new context for ML.NET operations. It can be used for
28+
// exception tracking and logging, as a catalog of available operations
29+
// and as the source of randomness. Setting the seed to a fixed number
30+
// in this example to make outputs deterministic.
31+
var seed = 0;
32+
var context = new MLContext(seed);
33+
34+
// Create a list of training data points and convert it to IDataView.
35+
var data = GenerateRandomBinaryClassificationDataPoints(100, seed);
36+
var dataView = context.Data.LoadFromEnumerable(data);
37+
38+
// Split the dataset into train and test sets with 10% of the data used for testing.
39+
var trainTestSplit = context.Data.TrainTestSplit(dataView, testFraction: 0.1);
40+
41+
// Define a customized search space for LightGBM
42+
var lgbmSearchSpace = new SearchSpace<LightGBMOption>();
43+
44+
// Define the sweepable LightGBM estimator.
45+
var lgbm = context.Auto().CreateSweepableEstimator((_context, option) =>
46+
{
47+
return _context.BinaryClassification.Trainers.LightGbm(
48+
"Label",
49+
"Features",
50+
numberOfLeaves: option.NumberOfLeaves,
51+
numberOfIterations: option.NumberOfTrees);
52+
}, lgbmSearchSpace);
53+
54+
// Create sweepable pipeline
55+
var pipeline = new EstimatorChain<ITransformer>().Append(lgbm);
56+
57+
// Create an AutoML experiment
58+
var experiment = context.Auto().CreateExperiment();
59+
60+
// Redirect AutoML log to console
61+
context.Log += (object o, LoggingEventArgs e) =>
62+
{
63+
if (e.Source == nameof(AutoMLExperiment) && e.Kind > Runtime.ChannelMessageKind.Trace)
64+
{
65+
Console.WriteLine(e.RawMessage);
66+
}
67+
};
68+
69+
// Config experiment to optimize "Accuracy" metric on given dataset.
70+
// This experiment will run hyper-parameter optimization on given pipeline
71+
experiment.SetPipeline(pipeline)
72+
.SetDataset(trainTestSplit.TrainSet, fold: 5) // use 5-fold cross validation to evaluate each trial
73+
.SetBinaryClassificationMetric(BinaryClassificationMetric.Accuracy, "Label")
74+
.SetMaxModelToExplore(100); // explore 100 trials
75+
76+
// start automl experiment
77+
var result = await experiment.RunAsync();
78+
79+
// Expected output samples during training. The pipeline will be unknown because it's created using
80+
// customized sweepable estimator, therefore AutoML doesn't have the knowledge of the exact type of the estimator.
81+
// Update Running Trial - Id: 0
82+
// Update Completed Trial - Id: 0 - Metric: 0.5105967259285338 - Pipeline: Unknown=>Unknown - Duration: 616 - Peak CPU: 0.00% - Peak Memory in MB: 35.54
83+
// Update Best Trial - Id: 0 - Metric: 0.5105967259285338 - Pipeline: Unknown=>Unknown
84+
85+
// evaluate test dataset on best model.
86+
var bestModel = result.Model;
87+
var eval = bestModel.Transform(trainTestSplit.TestSet);
88+
var metrics = context.BinaryClassification.Evaluate(eval);
89+
90+
PrintMetrics(metrics);
91+
92+
// Expected output:
93+
// Accuracy: 0.67
94+
// AUC: 0.75
95+
// F1 Score: 0.33
96+
// Negative Precision: 0.88
97+
// Negative Recall: 0.70
98+
// Positive Precision: 0.25
99+
// Positive Recall: 0.50
100+
101+
// TEST POSITIVE RATIO: 0.1667(2.0 / (2.0 + 10.0))
102+
// Confusion table
103+
// ||======================
104+
// PREDICTED || positive | negative | Recall
105+
// TRUTH ||======================
106+
// positive || 1 | 1 | 0.5000
107+
// negative || 3 | 7 | 0.7000
108+
// ||======================
109+
// Precision || 0.2500 | 0.8750 |
110+
}
111+
112+
private static IEnumerable<BinaryClassificationDataPoint> GenerateRandomBinaryClassificationDataPoints(int count,
113+
int seed = 0)
114+
115+
{
116+
var random = new Random(seed);
117+
float randomFloat() => (float)random.NextDouble();
118+
for (int i = 0; i < count; i++)
119+
{
120+
var label = randomFloat() > 0.5f;
121+
yield return new BinaryClassificationDataPoint
122+
{
123+
Label = label,
124+
// Create random features that are correlated with the label.
125+
// For data points with false label, the feature values are
126+
// slightly increased by adding a constant.
127+
Features = Enumerable.Repeat(label, 50)
128+
.Select(x => x ? randomFloat() : randomFloat() +
129+
0.1f).ToArray()
130+
131+
};
132+
}
133+
}
134+
135+
// Example with label and 50 feature values. A data set is a collection of
136+
// such examples.
137+
private class BinaryClassificationDataPoint
138+
{
139+
public bool Label { get; set; }
140+
141+
[VectorType(50)]
142+
public float[] Features { get; set; }
143+
}
144+
145+
// Class used to capture predictions.
146+
private class Prediction
147+
{
148+
// Original label.
149+
public bool Label { get; set; }
150+
// Predicted label from the trainer.
151+
public bool PredictedLabel { get; set; }
152+
}
153+
154+
// Pretty-print BinaryClassificationMetrics objects.
155+
private static void PrintMetrics(BinaryClassificationMetrics metrics)
156+
{
157+
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
158+
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
159+
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
160+
Console.WriteLine($"Negative Precision: " +
161+
$"{metrics.NegativePrecision:F2}");
162+
163+
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
164+
Console.WriteLine($"Positive Precision: " +
165+
$"{metrics.PositivePrecision:F2}");
166+
167+
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
168+
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
169+
}
170+
}
171+
}

src/Microsoft.ML.AutoML/API/AutoCatalog.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,13 @@ public ColumnInferenceResults InferColumns(string path, uint labelColumnIndex, b
291291
/// <summary>
292292
/// Create a sweepable estimator with a custom factory and search space.
293293
/// </summary>
294+
/// <example>
295+
/// <format type="text/markdown">
296+
/// <![CDATA[
297+
/// [!code-csharp[AutoMLExperiment](~/../docs/samples/docs/samples/Microsoft.ML.AutoML.Samples/Sweepable/SweepableLightGBMBinaryExperiment.cs)]
298+
/// ]]>
299+
/// </format>
300+
/// </example>
294301
public SweepableEstimator CreateSweepableEstimator<T>(Func<MLContext, T, IEstimator<ITransformer>> factory, SearchSpace<T> ss = null)
295302
where T : class, new()
296303
{

src/Microsoft.ML.SearchSpace/Parameter.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,13 @@ public enum ParameterType
5252
/// <summary>
5353
/// <see cref="Parameter"/> is used to save sweeping result from tuner and is used to restore mlnet pipeline from sweepable pipeline.
5454
/// </summary>
55+
/// <example>
56+
/// <format type="text/markdown">
57+
/// <![CDATA[
58+
/// [!code-csharp[AutoMLExperiment](~/../docs/samples/docs/samples/Microsoft.ML.AutoML.Samples/Sweepable/ParameterExample.cs)]
59+
/// ]]>
60+
/// </format>
61+
/// </example>
5562
[JsonConverter(typeof(ParameterConverter))]
5663
public sealed class Parameter : IDictionary<string, Parameter>, IEquatable<Parameter>, IEqualityComparer<Parameter>
5764
{

0 commit comments

Comments
 (0)