using System.Globalization;
using Microsoft.ML.AutoML;
const string TrainDataPath = "train.csv";
const string ValidationDataPath = "valid.csv";
const string TestDataPath = "test.csv";
public static void Main()
WriteDataset(TrainDataPath, 10, true);
WriteDataset(ValidationDataPath, 30, false);
WriteDataset(TestDataPath, 200, false);
CultureInfo.DefaultThreadCurrentCulture = new CultureInfo("ru-RU");
private static void WriteDataset(string filename, int rows, bool header)
using (System.IO.StreamWriter file = new System.IO.StreamWriter(filename))
var culture = CultureInfo.InvariantCulture;
file.WriteLine($"Label,Feat1,Feat2,Feat3");
for (int i = 0; i < rows; i++)
var label = rnd.NextDouble();
file.WriteLine($"{label.ToString("F4", culture)},{(label + rnd.NextDouble()).ToString("F4", culture)},{(label * rnd.NextDouble()).ToString("F4", culture)},{(label * rnd.NextDouble()).ToString("F4", culture)}");
private static void RunAutoML()
var mlContext = new MLContext();
ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDataPath, "Label", groupColumns: false);
ColumnInformation columnInformation = columnInference.ColumnInformation;
TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderOptions);
IDataView TrainDataView = textLoader.Load(TrainDataPath);
IDataView ValidationDataView = textLoader.Load(ValidationDataPath);
IDataView TestDataView = textLoader.Load(TestDataPath);
var progressHandler = new RegressionExperimentProgressHandler();
var experimentSettings = new RegressionExperimentSettings();
experimentSettings.MaxExperimentTimeInSeconds = 9;
experimentSettings.Trainers.Clear();
experimentSettings.Trainers.Add(RegressionTrainer.LightGbm);
experimentSettings.Trainers.Add(RegressionTrainer.LbfgsPoissonRegression);
experimentSettings.Trainers.Add(RegressionTrainer.FastTree);
var experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings);
Console.WriteLine($"\nRunning AutoML regression experiment...");
ExperimentResult<RegressionMetrics> experimentResult = experiment.Execute(TrainDataView, ValidationDataView, columnInformation, null, progressHandler);
var topRuns = experimentResult.RunDetails
.Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.RSquared))
.OrderByDescending(r => r.ValidationMetrics.RSquared).Take(5);
Console.WriteLine("\nTop models ranked by rsquared --");
for (var _iterationIndex = 0; _iterationIndex < topRuns.Count(); _iterationIndex++)
var iterationResult = topRuns.ElementAt(_iterationIndex);
Console.WriteLine("|" + $"{_iterationIndex,-4} {iterationResult.TrainerName,-35} {iterationResult.ValidationMetrics?.RSquared ?? double.NaN,8:F4} {iterationResult.ValidationMetrics?.MeanAbsoluteError ?? double.NaN,13:F2} {iterationResult.ValidationMetrics?.MeanSquaredError ?? double.NaN,12:F2} {iterationResult.ValidationMetrics?.RootMeanSquaredError ?? double.NaN,8:F2} {iterationResult.RuntimeInSeconds,9:F1}".PadRight(112) + "|");
Console.WriteLine("\n===== Evaluating model's rsquared with test data =====");
IDataView predictions = experimentResult.BestRun.Model.Transform(TestDataView);
var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: "Label", scoreColumnName: "Score");
Console.WriteLine("|" + $"{"-",-4} {experimentResult.BestRun.TrainerName,-35} {metrics?.RSquared ?? double.NaN,8:F4} {metrics?.MeanAbsoluteError ?? double.NaN,13:F2} {metrics?.MeanSquaredError ?? double.NaN,12:F2} {metrics?.RootMeanSquaredError ?? double.NaN,8:F2} {experimentResult.BestRun.RuntimeInSeconds,9:F1}".PadRight(112) + "|");
Console.WriteLine("\n===== Refitting on train+valid and evaluating model's rsquared with test data =====");
var TrainPlusValidationDataView = textLoader.Load(new MultiFileSource(TrainDataPath, ValidationDataPath));
var refitModel1 = experimentResult.BestRun.Estimator.Fit(TrainPlusValidationDataView);
IDataView predictionsRefitOnTrainPlusValidation = refitModel1.Transform(TestDataView);
var metricsRefitOnTrainPlusValidation = mlContext.Regression.Evaluate(predictionsRefitOnTrainPlusValidation, labelColumnName: "Label", scoreColumnName: "Score");
Console.WriteLine("|" + $"{"-",-4} {experimentResult.BestRun.TrainerName,-35} {metricsRefitOnTrainPlusValidation?.RSquared ?? double.NaN,8:F4} {metricsRefitOnTrainPlusValidation?.MeanAbsoluteError ?? double.NaN,13:F2} {metricsRefitOnTrainPlusValidation?.MeanSquaredError ?? double.NaN,12:F2} {metricsRefitOnTrainPlusValidation?.RootMeanSquaredError ?? double.NaN,8:F2} {"-",9}".PadRight(112) + "|");
Console.WriteLine("\n===== Refitting on train+valid+test to get the final model to launch to production =====");
var TrainPlusValidationPlusTestDataView = textLoader.Load(new MultiFileSource(TrainDataPath, ValidationDataPath, TestDataPath));
var refitModel2 = experimentResult.BestRun.Estimator.Fit(TrainPlusValidationPlusTestDataView);
Console.WriteLine("\n=============== Saving the model ===============");
mlContext.Model.Save(refitModel2, TrainDataView.Schema, "model.zip");
Console.WriteLine($"The model is saved to model.zip");
public class RegressionExperimentProgressHandler : IProgress<RunDetail<RegressionMetrics>>
private int _iterationIndex;
public void Report(RunDetail<RegressionMetrics> iterationResult)
if (_iterationIndex++ == 0)
Console.WriteLine("|" + $"{"",-4} {"Trainer",-35} {"RSquared",8} {"Absolute-loss",13} {"Squared-loss",12} {"RMS-loss",8} {"Duration",9}".PadRight(112) + "|");
if (iterationResult.Exception != null)
Console.WriteLine($"Exception during AutoML iteration: {iterationResult.Exception}");
Console.WriteLine("|" + $"{_iterationIndex,-4} {iterationResult.TrainerName,-35} {iterationResult.ValidationMetrics?.RSquared ?? double.NaN,8:F4} {iterationResult.ValidationMetrics?.MeanAbsoluteError ?? double.NaN,13:F2} {iterationResult.ValidationMetrics?.MeanSquaredError ?? double.NaN,12:F2} {iterationResult.ValidationMetrics?.RootMeanSquaredError ?? double.NaN,8:F2} {iterationResult.RuntimeInSeconds,9:F1}".PadRight(112) + "|");