RatePlanNameAnalysis/RatePlanNameAnalysis.ConsoleApp/Program.cs
2024-08-18 21:11:29 +08:00

211 lines
11 KiB
C#

using System;
using System.IO;
using System.Linq;
using Microsoft.ML;
namespace RatePlanNameAnalysis
{
class Program
{
private static string _appPath => Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]);
private static string _trainDataPath => Path.Combine(_appPath, "..", "..", "..", "Data", "RatePlanNameAnalysis_train.csv");
private static string _testDataPath => Path.Combine(_appPath, "..", "..", "..", "Data", "RatePlanNameAnalysis_test.csv");
private static string _modelPath => Path.Combine(_appPath, "..", "..", "..", "Models", "model.zip");
private static MLContext _mlContext;
private static PredictionEngine<RatePlanNameAnalysisData, BedTypePrediction> _predEngine;
private static ITransformer _trainedModel;
static IDataView _trainingDataView;
// </SnippetDeclareGlobalVariables>
static void Main(string[] args)
{
// Create MLContext to be shared across the model creation workflow objects
// Set a random seed for repeatable/deterministic results across multiple trainings.
// <SnippetCreateMLContext>
_mlContext = new MLContext(seed: 0);
// </SnippetCreateMLContext>
// STEP 1: Common data loading configuration
// CreateTextReader<BedType>(hasHeader: true) - Creates a TextLoader by inferencing the dataset schema from the BedType data model type.
// .Read(_trainDataPath) - Loads the training text file into an IDataView (_trainingDataView) and maps from input columns to IDataView columns.
Console.WriteLine($"=============== Loading Dataset ===============");
// <SnippetLoadTrainData>
var loader = _mlContext.Data.CreateTextLoader<RatePlanNameAnalysisData>(separatorChar: ',',
hasHeader: true,
allowQuoting: true); // 支持引号
_trainingDataView = loader.Load(_trainDataPath);
//_trainingDataView = _mlContext.Data.LoadFromTextFile<RatePlanNameAnalysisData>(_trainDataPath, separatorChar: ',', hasHeader: true);
// </SnippetLoadTrainData>
Console.WriteLine($"=============== Finished Loading Dataset ===============");
// <SnippetSplitData>
// var (trainData, testData) = _mlContext.MulticlassClassification.TrainTestSplit(_trainingDataView, testFraction: 0.1);
// </SnippetSplitData>
// <SnippetCallProcessData>
var pipeline = ProcessData();
// </SnippetCallProcessData>
// <SnippetCallBuildAndTrainModel>
var trainingPipeline = BuildAndTrainModel(_trainingDataView, pipeline);
// </SnippetCallBuildAndTrainModel>
// <SnippetCallEvaluate>
Evaluate(_trainingDataView.Schema);
// </SnippetCallEvaluate>
// <SnippetCallPredictBedType>
PredictBedType();
// </SnippetCallPredictBedType>
}
public static IEstimator<ITransformer> ProcessData()
{
Console.WriteLine($"=============== Processing Data ===============");
// STEP 2: Common data process configuration with pipeline data transformations
// <SnippetMapValueToKey>
var pipeline = _mlContext.Transforms.Conversion.MapValueToKey(inputColumnName: "BedTypeName", outputColumnName: "Label")
// </SnippetMapValueToKey>
// <SnippetFeaturizeText>
.Append(_mlContext.Transforms.Text.FeaturizeText(inputColumnName: "RatePlanName", outputColumnName: "RatePlanNameFeaturized"))
// </SnippetFeaturizeText>
// <SnippetConcatenate>
.Append(_mlContext.Transforms.Concatenate("Features", "RatePlanNameFeaturized"))
// </SnippetConcatenate>
//Sample Caching the DataView so estimators iterating over the data multiple times, instead of always reading from file, using the cache might get better performance.
// <SnippetAppendCache>
.AppendCacheCheckpoint(_mlContext);
// </SnippetAppendCache>
Console.WriteLine($"=============== Finished Processing Data ===============");
// <SnippetReturnPipeline>
return pipeline;
// </SnippetReturnPipeline>
}
public static IEstimator<ITransformer> BuildAndTrainModel(IDataView trainingDataView, IEstimator<ITransformer> pipeline)
{
// STEP 3: Create the training algorithm/trainer
// Use the multi-class SDCA algorithm to predict the label using features.
//Set the trainer/algorithm and map label to value (original readable state)
// <SnippetAddTrainer>
var trainingPipeline = pipeline.Append(_mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("Label", "Features"))
.Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));
// </SnippetAddTrainer>
// STEP 4: Train the model fitting to the DataSet
Console.WriteLine($"=============== Training the model ===============");
// <SnippetTrainModel>
_trainedModel = trainingPipeline.Fit(trainingDataView);
// </SnippetTrainModel>
Console.WriteLine($"=============== Finished Training the model Ending time: {DateTime.Now.ToString()} ===============");
// (OPTIONAL) Try/test a single prediction with the "just-trained model" (Before saving the model)
Console.WriteLine($"=============== Single Prediction just-trained-model ===============");
// Create prediction engine related to the loaded trained model
// <SnippetCreatePredictionEngine1>
_predEngine = _mlContext.Model.CreatePredictionEngine<RatePlanNameAnalysisData, BedTypePrediction>(_trainedModel);
// </SnippetCreatePredictionEngine1>
// <SnippetCreateTestBedType1>
RatePlanNameAnalysisData BedType = new RatePlanNameAnalysisData()
{
RatePlanName = "double suite (double bed) (double king size bed, terrace)"
};
// </SnippetCreateTestBedType1>
// <SnippetPredict>
var prediction = _predEngine.Predict(BedType);
// </SnippetPredict>
// <SnippetOutputPrediction>
Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.BedTypeName} ===============");
// </SnippetOutputPrediction>
// <SnippetReturnModel>
return trainingPipeline;
// </SnippetReturnModel>
}
public static void Evaluate(DataViewSchema trainingDataViewSchema)
{
// STEP 5: Evaluate the model in order to get the model's accuracy metrics
Console.WriteLine($"=============== Evaluating to get model's accuracy metrics - Starting time: {DateTime.Now.ToString()} ===============");
//Load the test dataset into the IDataView
// <SnippetLoadTestDataset>
// 创建数据加载器
var loader = _mlContext.Data.CreateTextLoader<RatePlanNameAnalysisData>(separatorChar: ',',
hasHeader: true,
allowQuoting: true); // 支持引号
// 加载数据
var testDataView = loader.Load(_testDataPath);
// var testDataView = _mlContext.Data.LoadFromTextFile<RatePlanNameAnalysisData>(_testDataPath, separatorChar: ',', hasHeader: true);
// </SnippetLoadTestDataset>
//Evaluate the model on a test dataset and calculate metrics of the model on the test data.
// <SnippetEvaluate>
var testMetrics = _mlContext.MulticlassClassification.Evaluate(_trainedModel.Transform(testDataView));
// </SnippetEvaluate>
Console.WriteLine($"=============== Evaluating to get model's accuracy metrics - Ending time: {DateTime.Now.ToString()} ===============");
// <SnippetDisplayMetrics>
Console.WriteLine($"*************************************************************************************************************");
Console.WriteLine($"* Metrics for Multi-class Classification model - Test Data ");
Console.WriteLine($"*------------------------------------------------------------------------------------------------------------");
Console.WriteLine($"* MicroAccuracy: {testMetrics.MicroAccuracy:0.###}");
Console.WriteLine($"* MacroAccuracy: {testMetrics.MacroAccuracy:0.###}");
Console.WriteLine($"* LogLoss: {testMetrics.LogLoss:#.###}");
Console.WriteLine($"* LogLossReduction: {testMetrics.LogLossReduction:#.###}");
Console.WriteLine($"*************************************************************************************************************");
// </SnippetDisplayMetrics>
// Save the new model to .ZIP file
// <SnippetCallSaveModel>
SaveModelAsFile(_mlContext, trainingDataViewSchema, _trainedModel);
// </SnippetCallSaveModel>
}
public static void PredictBedType()
{
// <SnippetLoadModel>
ITransformer loadedModel = _mlContext.Model.Load(_modelPath, out var modelInputSchema);
// </SnippetLoadModel>
// <SnippetAddTestBedType>
RatePlanNameAnalysisData singleBedType = new RatePlanNameAnalysisData() { RatePlanName = "double suite (double bed) (double king size bed, terrace)" };
// </SnippetAddTestBedType>
//Predict label for single hard-coded BedType
// <SnippetCreatePredictionEngine>
_predEngine = _mlContext.Model.CreatePredictionEngine<RatePlanNameAnalysisData, BedTypePrediction>(loadedModel);
// </SnippetCreatePredictionEngine>
// <SnippetPredictBedType>
var prediction = _predEngine.Predict(singleBedType);
// </SnippetPredictBedType>
// <SnippetDisplayResults>
Console.WriteLine($"=============== Single Prediction - Result: {prediction.BedTypeName} ===============");
// </SnippetDisplayResults>
}
private static void SaveModelAsFile(MLContext mlContext, DataViewSchema trainingDataViewSchema, ITransformer model)
{
// <SnippetSaveModel>
mlContext.Model.Save(model, trainingDataViewSchema, _modelPath);
// </SnippetSaveModel>
Console.WriteLine("The model is saved to {0}", _modelPath);
}
}
}