using System; using System.IO; using System.Linq; using Microsoft.ML; namespace RatePlanNameAnalysis { class Program { private static string _appPath => Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]); private static string _trainDataPath => Path.Combine(_appPath, "..", "..", "..", "Data", "RatePlanNameAnalysis_train.csv"); private static string _testDataPath => Path.Combine(_appPath, "..", "..", "..", "Data", "RatePlanNameAnalysis_test.csv"); private static string _modelPath => Path.Combine(_appPath, "..", "..", "..", "Models", "model.zip"); private static MLContext _mlContext; private static PredictionEngine _predEngine; private static ITransformer _trainedModel; static IDataView _trainingDataView; // static void Main(string[] args) { // Create MLContext to be shared across the model creation workflow objects // Set a random seed for repeatable/deterministic results across multiple trainings. // _mlContext = new MLContext(seed: 0); // // STEP 1: Common data loading configuration // CreateTextReader(hasHeader: true) - Creates a TextLoader by inferencing the dataset schema from the BedType data model type. // .Read(_trainDataPath) - Loads the training text file into an IDataView (_trainingDataView) and maps from input columns to IDataView columns. Console.WriteLine($"=============== Loading Dataset ==============="); // var loader = _mlContext.Data.CreateTextLoader(separatorChar: ',', hasHeader: true, allowQuoting: true); // 支持引号 _trainingDataView = loader.Load(_trainDataPath); //_trainingDataView = _mlContext.Data.LoadFromTextFile(_trainDataPath, separatorChar: ',', hasHeader: true); // Console.WriteLine($"=============== Finished Loading Dataset ==============="); // // var (trainData, testData) = _mlContext.MulticlassClassification.TrainTestSplit(_trainingDataView, testFraction: 0.1); // // var pipeline = ProcessData(); // // var trainingPipeline = BuildAndTrainModel(_trainingDataView, pipeline); // // Evaluate(_trainingDataView.Schema); // // PredictBedType(); // } public static IEstimator ProcessData() { Console.WriteLine($"=============== Processing Data ==============="); // STEP 2: Common data process configuration with pipeline data transformations // var pipeline = _mlContext.Transforms.Conversion.MapValueToKey(inputColumnName: "BedTypeName", outputColumnName: "Label") // // .Append(_mlContext.Transforms.Text.FeaturizeText(inputColumnName: "RatePlanName", outputColumnName: "RatePlanNameFeaturized")) // // .Append(_mlContext.Transforms.Concatenate("Features", "RatePlanNameFeaturized")) // //Sample Caching the DataView so estimators iterating over the data multiple times, instead of always reading from file, using the cache might get better performance. // .AppendCacheCheckpoint(_mlContext); // Console.WriteLine($"=============== Finished Processing Data ==============="); // return pipeline; // } public static IEstimator BuildAndTrainModel(IDataView trainingDataView, IEstimator pipeline) { // STEP 3: Create the training algorithm/trainer // Use the multi-class SDCA algorithm to predict the label using features. //Set the trainer/algorithm and map label to value (original readable state) // var trainingPipeline = pipeline.Append(_mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("Label", "Features")) .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); // // STEP 4: Train the model fitting to the DataSet Console.WriteLine($"=============== Training the model ==============="); // _trainedModel = trainingPipeline.Fit(trainingDataView); // Console.WriteLine($"=============== Finished Training the model Ending time: {DateTime.Now.ToString()} ==============="); // (OPTIONAL) Try/test a single prediction with the "just-trained model" (Before saving the model) Console.WriteLine($"=============== Single Prediction just-trained-model ==============="); // Create prediction engine related to the loaded trained model // _predEngine = _mlContext.Model.CreatePredictionEngine(_trainedModel); // // RatePlanNameAnalysisData BedType = new RatePlanNameAnalysisData() { RatePlanName = "double suite (double bed) (double king size bed, terrace)" }; // // var prediction = _predEngine.Predict(BedType); // // Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.BedTypeName} ==============="); // // return trainingPipeline; // } public static void Evaluate(DataViewSchema trainingDataViewSchema) { // STEP 5: Evaluate the model in order to get the model's accuracy metrics Console.WriteLine($"=============== Evaluating to get model's accuracy metrics - Starting time: {DateTime.Now.ToString()} ==============="); //Load the test dataset into the IDataView // // 创建数据加载器 var loader = _mlContext.Data.CreateTextLoader(separatorChar: ',', hasHeader: true, allowQuoting: true); // 支持引号 // 加载数据 var testDataView = loader.Load(_testDataPath); // var testDataView = _mlContext.Data.LoadFromTextFile(_testDataPath, separatorChar: ',', hasHeader: true); // //Evaluate the model on a test dataset and calculate metrics of the model on the test data. // var testMetrics = _mlContext.MulticlassClassification.Evaluate(_trainedModel.Transform(testDataView)); // Console.WriteLine($"=============== Evaluating to get model's accuracy metrics - Ending time: {DateTime.Now.ToString()} ==============="); // Console.WriteLine($"*************************************************************************************************************"); Console.WriteLine($"* Metrics for Multi-class Classification model - Test Data "); Console.WriteLine($"*------------------------------------------------------------------------------------------------------------"); Console.WriteLine($"* MicroAccuracy: {testMetrics.MicroAccuracy:0.###}"); Console.WriteLine($"* MacroAccuracy: {testMetrics.MacroAccuracy:0.###}"); Console.WriteLine($"* LogLoss: {testMetrics.LogLoss:#.###}"); Console.WriteLine($"* LogLossReduction: {testMetrics.LogLossReduction:#.###}"); Console.WriteLine($"*************************************************************************************************************"); // // Save the new model to .ZIP file // SaveModelAsFile(_mlContext, trainingDataViewSchema, _trainedModel); // } public static void PredictBedType() { // ITransformer loadedModel = _mlContext.Model.Load(_modelPath, out var modelInputSchema); // // RatePlanNameAnalysisData singleBedType = new RatePlanNameAnalysisData() { RatePlanName = "double suite (double bed) (double king size bed, terrace)" }; // //Predict label for single hard-coded BedType // _predEngine = _mlContext.Model.CreatePredictionEngine(loadedModel); // // var prediction = _predEngine.Predict(singleBedType); // // Console.WriteLine($"=============== Single Prediction - Result: {prediction.BedTypeName} ==============="); // } private static void SaveModelAsFile(MLContext mlContext, DataViewSchema trainingDataViewSchema, ITransformer model) { // mlContext.Model.Save(model, trainingDataViewSchema, _modelPath); // Console.WriteLine("The model is saved to {0}", _modelPath); } } }