2

I used ML.Net machine learning from Microsoft.. I want to print the Processing input vector used in the training process .. Can I print it?

    private static string _appPath => Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]);

    //TRAIN_DATA_FILEPATH: has the path to the dataset used to train the model.
    private static string TRAIN_DATA_FILEPATH => Path.Combine(_appPath, "..", "..", "..", "Data", "A.csv");
    //@"C:\Users\taqwa\Desktop\A.csv"
    private static string MODEL_FILEPATH = @"../../../../MyMLAppML.Model/MLModel.zip";

    // Create MLContext to be shared across the model creation workflow objects 
    // Set a random seed for repeatable/deterministic results across multiple trainings.
    private static MLContext mlContext = new MLContext(seed: 1);

    public static void CreateModel()
    {
        // Load Data
        //ModelInput is the input dataset class and has the following String fields: Cases, Algorith and InjuryOrIllness 
        IDataView trainingDataView = mlContext.Data.LoadFromTextFile<ModelInput>(
                                        path: TRAIN_DATA_FILEPATH,
                                        hasHeader: true,   //true if the Header property is not null; otherwise, false. The default is false.
                                        separatorChar: ',',
                                        allowQuoting: true,  //Whether the file can contain columns defined by a quoted string. Whether the input may include quoted values, which can contain separator characters, colons, and distinguish empty values from missing values. When true, consecutive separators denote a missing value and an empty value is denoted by "". When false, consecutive separators denote an empty value.
                                        allowSparse: false); //Whether the file can contain numerical vectors in sparse format.


        // Build training pipeline
        IEstimator<ITransformer> trainingPipeline = BuildTrainingPipeline(mlContext);

        // Evaluate quality of Model
    //    Evaluate(mlContext, trainingDataView, trainingPipeline);

        // Train Model
        ITransformer mlModel = TrainModel(mlContext, trainingDataView, trainingPipeline);

        // Save model
      //  SaveModel(mlContext, mlModel, MODEL_FILEPATH, trainingDataView.Schema);
    }

    public static IEstimator<ITransformer> BuildTrainingPipeline(MLContext mlContext)
    {
        // Data process configuration with pipeline data transformations 
        var dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey("Algorithm", "Algorithm")
                                  //MapValueToKey: method to transform the Algorithm column into a numeric key type Algorithm column (a format accepted by classification algorithms) and add it as a new dataset column
                                  .Append(mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("injuryOrIllness", "injuryOrIllness") }))
                                  //OneHotEncoding: which converts one or more input text columns specified in columns into as many columns of one-hot encoded vectors.
                                  .Append(mlContext.Transforms.Text.FeaturizeText("Cases_tf", "Cases"))
                                  //FeaturizeText which transforms the text (Cases_tf) columns into a numeric vector for each called Cases and Append the featurization to the pipeline
                                  .Append(mlContext.Transforms.Concatenate("Features", new[] { "injuryOrIllness", "Cases_tf" }))
                                  .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                                  //AppendCacheCheckpoint to cache the DataView so when you iterate over the data multiple times using the cache might get better performance
                                  .AppendCacheCheckpoint(mlContext);


        // Set the training algorithm 
        //Here we used the AveragedPerceptron
        var trainer = mlContext.MulticlassClassification.Trainers.OneVersusAll(mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: "Algorithm", numberOfIterations: 10, featureColumnName: "Features"), labelColumnName: "Algorithm")
                                  .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));
        //var trainer = mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(labelColumnName: "Algorithm", featureColumnName: "Features")
        //              .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

        //OneVersusAllTrainer: which predicts a multiclass target using one-versus-all strategy with the binary classification estimator specified by binaryEstimator.
        var trainingPipeline = dataProcessPipeline.Append(trainer);


        return trainingPipeline;

    }


    public static ITransformer TrainModel(MLContext mlContext, IDataView trainingDataView, IEstimator<ITransformer> trainingPipeline)
    {
        Console.WriteLine("=============== Training  model ===============");

        //Fit(): method trains your model by transforming the dataset and applying the training. and return the trained model.
        ITransformer model = trainingPipeline.Fit(trainingDataView);
        Console.WriteLine($"{trainingDataView.Schema}");



        Console.WriteLine("=============== End of training process ===============");
        return model;
    }

This is a part of my code.. I trired to print the Processing or the featurized input vector used in the training process ..

So, I tried to print the (trainingDataView.Schema) as Console.WriteLine($"{trainingDataView.Schema}"); but the complement looks like a (non-public members).

this picture can describe what I want

2 Answers 2

1

Have you tried using Preview() method? Preview can be used on IEstimator and also on ITransformer. You can use GetColumn<> to get the value of a specific column from IDataView. Also, check this documentation page https://learn.microsoft.com/cs-cz/dotnet/machine-learning/how-to-guides/inspect-intermediate-data-ml-net

Sign up to request clarification or add additional context in comments.

Comments

0

You can check schema of data or iterate over each row.

On the first case, You can use:

var schema = data.Preview();

Othervise, You can iterate by:

 IEnumerable<ModelInput> inputData = mlContext.Data.CreateEnumerable<ModelInput>(data, reuseRowObject: true);

 foreach (ModelInput row in inputData)
 {
       foreach (var prop in row.GetType().GetProperties())
       {
            Console.WriteLine("{0}={1}", prop.Name, prop.GetValue(row, null));
       }
 }

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.