Update to ML 1.0

andrewdalpino · andrewdalpino · commit 7a4598172fed · 2021-05-08T13:43:23.000-05:00
diff --git a/.gitignore b/.gitignore
@@ -2,7 +2,7 @@
 composer.lock
 progress.csv
 report.json
+*.rbx
+*.old
 .vscode
-.vs
-*.model
-*.old
+.vs
diff --git a/README.md b/README.md
@@ -84,7 +84,7 @@ use Rubix\ML\Persisters\Filesystem;
 $estimator = new PersistentModel(
     new Pipeline([
         new TextNormalizer(),
-        new WordCountVectorizer(10000, 2, 10000, new NGram(1, 2)),
+        new WordCountVectorizer(10000, 0.00008, 0.4, new NGram(1, 2)),
         new TfIdfTransformer(),
         new ZScaleStandardizer(),
     ], new MultilayerPerceptron([
@@ -100,7 +100,7 @@ $estimator = new PersistentModel(
         new Dense(50),
         new PReLU(),
     ], 256, new AdaMax(0.0001))),
-    new Filesystem('sentiment.model', true)
+    new Filesystem('sentiment.rbx', true)
 );
 ```
 
@@ -116,22 +116,14 @@ $estimator->train($dataset);
 ```
 
 ### Validation Score and Loss
-During training, the learner will record the validation score and the training loss at each iteration or *epoch*. The validation score is calculated using the default [F Beta](https://docs.rubixml.com/latest/cross-validation/metrics/f-beta.html) metric on a hold out portion of the training set called a *validation* set. Contrariwise, the training loss is the value of the cost function (in this case the [Cross Entropy](https://docs.rubixml.com/latest/neural-network/cost-functions/cross-entropy.html) loss) calculated over the samples left in the training set. We can visualize the training progress by plotting these metrics. To output the scores and losses you can call the additional `scores()` and `steps()` methods respectively.
+During training, the learner will record the validation score and the training loss at each iteration or *epoch*. The validation score is calculated using the default [F Beta](https://docs.rubixml.com/latest/cross-validation/metrics/f-beta.html) metric on a hold out portion of the training set called a *validation* set. Contrariwise, the training loss is the value of the cost function (in this case the [Cross Entropy](https://docs.rubixml.com/latest/neural-network/cost-functions/cross-entropy.html) loss) calculated over the samples left in the training set. We can visualize the training progress by plotting these metrics. To output the scores and losses you can call the additional `steps()` method and pass the resulting iterator to a Writable extractor such as [CSV](https://docs.rubixml.com/latest/extractors/csv.html).
 
 ```php
-$scores = $estimator->scores();
+use Rubix\ML\Extractors\CSV;
 
-$losses = $estimator->steps();
-```
-Next, we'll use an [Unlabeled](https://docs.rubixml.com/latest/datasets/unlabeled.html) dataset object to temporarily store and convert the scores and losses into CSV format so that we can import the data into our favorite plotting application such as [Plotly](https://plotly.com) or [Excel](https://www.microsoft.com/en-us/microsoft-365/excel). The global `array_transpose()` function takes a 2-dimensional array and changes the rows to columns and vice versa. It is necessary to call this function in order to get the samples into the correct *shape* for the dataset object.
-
-```php
-use Rubix\ML\Datasets\Unlabeled;
-use function Rubix\ML\array_transpose;
-
-$table = array_transpose([$scores, $losses]);
+$extractor = new CSV('progress.csv', true);
 
-Unlabeled::build($table)->toCSV()->write('progress.csv');
+$extractor->export($estimator->steps());
 ```
 
 Here is an example of what the validation score and training loss looks like when they are plotted. The validation score should be getting better with each epoch as the loss decreases. You can generate your own plots by importing the `progress.csv` file into your plotting application.
@@ -184,7 +176,7 @@ Next, we'll use the Persistent Model wrapper to load the network we trained earl
 use Rubix\ML\PersistentModel;
 use Rubix\ML\Persisters\Filesystem;
 
-$estimator = PersistentModel::load(new Filesystem('sentiment.model'));
+$estimator = PersistentModel::load(new Filesystem('sentiment.rbx'));
 ```
 
 Now we can use the estimator to make predictions on the testing set. The `predict()` method on t he estimator takes a dataset as input and returns an array of predictions.
@@ -214,10 +206,10 @@ $results = $report->generate($predictions, $dataset->labels());
 echo $results;
 ```
 
-We'll also save a copy of the report to a JSON file.
+We'll also save a copy of the report to a JSON file using the Filesystem persister.
 
 ```php
-$results->toJSON()->write('report.json');
+$results->toJSON()->saveTo(new Filesystem('report.json'));
 ```
 
 Now we can execute the validation script from the command line.
@@ -327,7 +319,7 @@ First, load the model from storage using the static `load()` method on the Persi
 use Rubix\ML\PersistentModel;
 use Rubix\ML\Persisters\Filesystem;
 
-$estimator = PersistentModel::load(new Filesystem('sentiment.model'));
+$estimator = PersistentModel::load(new Filesystem('sentiment.rbx'));
 ```
 
 Next, we'll use the built-in PHP function `readline()` to prompt the user to enter some text that we'll store in a variable.
@@ -366,4 +358,4 @@ See DATASET_README. For comments or questions regarding the dataset please conta
 >- Andrew L. Maas, Raymond E. Daly, Peter T. Pham, Dan Huang, Andrew Y. Ng, and Christopher Potts. (2011). Learning Word Vectors for Sentiment Analysis. The 49th Annual Meeting of the Association for Computational Linguistics (ACL 2011).
 
 ## License
-The code is licensed [MIT](LICENSE) and the tutorial is licensed [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/).
+The code is licensed [MIT](LICENSE) and the tutorial is licensed [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/).
diff --git a/composer.json b/composer.json
@@ -20,8 +20,8 @@
         }
     ],
     "require": {
-        "php": ">=7.2",
-        "rubix/ml": "^0.3.0"
+        "php": ">=7.4",
+        "rubix/ml": "^1.0"
     },
     "scripts": {
         "predict": "@php predict.php",
diff --git a/predict.php b/predict.php
@@ -7,7 +7,7 @@
 
 ini_set('memory_limit', '-1');
 
-$estimator = PersistentModel::load(new Filesystem('sentiment.model'));
+$estimator = PersistentModel::load(new Filesystem('sentiment.rbx'));
 
 while (empty($text)) $text = readline("Enter some text to analyze:\n");
 
diff --git a/train.php b/train.php
@@ -2,13 +2,13 @@
 
 include __DIR__ . '/vendor/autoload.php';
 
-use Rubix\ML\Other\Loggers\Screen;
+use Rubix\ML\Loggers\Screen;
 use Rubix\ML\Datasets\Labeled;
 use Rubix\ML\PersistentModel;
 use Rubix\ML\Pipeline;
 use Rubix\ML\Transformers\TextNormalizer;
 use Rubix\ML\Transformers\WordCountVectorizer;
-use Rubix\ML\Other\Tokenizers\NGram;
+use Rubix\ML\Tokenizers\NGram;
 use Rubix\ML\Transformers\TfIdfTransformer;
 use Rubix\ML\Transformers\ZScaleStandardizer;
 use Rubix\ML\Classifiers\MultilayerPerceptron;
@@ -19,9 +19,7 @@
 use Rubix\ML\NeuralNet\ActivationFunctions\LeakyReLU;
 use Rubix\ML\NeuralNet\Optimizers\AdaMax;
 use Rubix\ML\Persisters\Filesystem;
-use Rubix\ML\Datasets\Unlabeled;
-
-use function Rubix\ML\array_transpose;
+use Rubix\ML\Extractors\CSV;
 
 ini_set('memory_limit', '-1');
 
@@ -43,7 +41,7 @@
 $estimator = new PersistentModel(
     new Pipeline([
         new TextNormalizer(),
-        new WordCountVectorizer(10000, 2, 10000, new NGram(1, 2)),
+        new WordCountVectorizer(10000, 0.00008, 0.4, new NGram(1, 2)),
         new TfIdfTransformer(),
         new ZScaleStandardizer(),
     ], new MultilayerPerceptron([
@@ -59,19 +57,16 @@
         new Dense(50),
         new PReLU(),
     ], 256, new AdaMax(0.0001))),
-    new Filesystem('sentiment.model', true)
+    new Filesystem('sentiment.rbx', true)
 );
 
 $estimator->setLogger($logger);
 
 $estimator->train($dataset);
 
-$scores = $estimator->scores();
-$losses = $estimator->steps();
+$extractor = new CSV('progress.csv', true);
 
-Unlabeled::build(array_transpose([$scores, $losses]))
-    ->toCSV(['scores', 'losses'])
-    ->write('progress.csv');
+$extractor->export($estimator->steps());
 
 $logger->info('Progress saved to progress.csv');
 
diff --git a/validate.php b/validate.php
@@ -2,7 +2,7 @@
 
 include __DIR__ . '/vendor/autoload.php';
 
-use Rubix\ML\Other\Loggers\Screen;
+use Rubix\ML\Loggers\Screen;
 use Rubix\ML\Datasets\Labeled;
 use Rubix\ML\PersistentModel;
 use Rubix\ML\Persisters\Filesystem;
@@ -27,7 +27,7 @@
 
 $dataset = Labeled::build($samples, $labels)->randomize()->take(10000);
 
-$estimator = PersistentModel::load(new Filesystem('sentiment.model'));
+$estimator = PersistentModel::load(new Filesystem('sentiment.rbx'));
 
 $logger->info('Making predictions');
 
@@ -42,6 +42,6 @@
 
 echo $results;
 
-$results->toJSON()->write('report.json');
+$results->toJSON()->saveTo(new Filesystem('report.json'));
 
-$logger->info('Report saved to report.json');
+$logger->info('Report saved to report.json');