Skip to content

Commit ff806de

Browse files
committed
Rudimentary documentation
1 parent b960492 commit ff806de

17 files changed

+97
-21
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "openml"
3-
version = "0.1.0"
3+
version = "0.1.1"
44
authors = ["Martin Billinger <flkazemakase@gmail.com>"]
55

66
description = "A rust interface to [OpenML](http://openml.org/)."

src/baseline/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//! Implementation of simple baseline models, used for testing and demonstration.
2+
13
mod naive_bayes_classifier;
24

35
pub use self::naive_bayes_classifier::NaiveBayesClassifier;

src/baseline/naive_bayes_classifier.rs

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,36 @@
1+
//! Implementation of a Gaussian Naive Bayes Classifier
2+
13
use std::cmp::Ordering;
24
use std::collections::HashMap;
35
use std::f64;
46
use std::fmt;
57
use std::hash::Hash;
68
use std::iter::FromIterator;
79

10+
/// A Gaussian Naive Bayes Classifier
11+
///
12+
/// The classifier is trained by consuming an iterator over the training data:
13+
/// ```
14+
/// let nbc: NaiveBayesClassifier<_> = data
15+
/// .iter()
16+
/// .collect();
17+
/// ```
818
#[derive(Debug)]
919
pub struct NaiveBayesClassifier<C>
1020
where C: Eq + Hash
1121
{
1222
class_distributions: HashMap<C, FeatureDistribution>,
1323
}
1424

25+
/// Distribution of each feature column
1526
#[derive(Debug, Clone)]
1627
struct FeatureDistribution {
17-
distributions: Vec<UniformNormalDistribution>
28+
distributions: Vec<NormalDistribution>
1829
}
1930

31+
/// Univariate Normal Distribution
2032
#[derive(Copy, Clone)]
21-
struct UniformNormalDistribution {
33+
struct NormalDistribution {
2234
sum: f64,
2335
sqsum: f64,
2436
n: usize
@@ -40,7 +52,7 @@ where
4052

4153
for (i, &xi) in x.into_iter().enumerate() {
4254
if i >= distributions.len() {
43-
distributions.resize(1 + i, UniformNormalDistribution::new());
55+
distributions.resize(1 + i, NormalDistribution::new());
4456
}
4557

4658
distributions[i].update(xi);
@@ -56,6 +68,7 @@ where
5668
impl<C> NaiveBayesClassifier<C>
5769
where C: Eq + Hash + Copy,
5870
{
71+
/// predict target class for a single feature vector
5972
pub fn predict(&self, x: &[f64]) -> C {
6073
self.class_distributions
6174
.iter()
@@ -88,9 +101,9 @@ impl FeatureDistribution {
88101
}
89102
}
90103

91-
impl UniformNormalDistribution {
104+
impl NormalDistribution {
92105
fn new() -> Self {
93-
UniformNormalDistribution {
106+
NormalDistribution {
94107
sum: 0.0,
95108
sqsum: 0.0,
96109
n: 0
@@ -120,7 +133,7 @@ impl UniformNormalDistribution {
120133
}
121134
}
122135

123-
impl fmt::Debug for UniformNormalDistribution {
136+
impl fmt::Debug for NormalDistribution {
124137
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
125138
write!(f, "N{{{}; {}}}", self.mean(), self.variance())
126139
}

src/dataset.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
use arff::dynamic::DataSet as ArffDataSet;
22

3+
/// An arbitrary data set
34
#[derive(Debug)]
45
pub(crate) struct DataSet {
56
pub(crate) arff: ArffDataSet,
67
pub(crate) target: Option<String>,
78
}
89

910
impl DataSet {
11+
/// return two `ArffDataSet`s; one containing the features and the other containing the target
12+
/// variable.
1013
pub(crate) fn clone_split(&self) -> Option<(ArffDataSet, ArffDataSet)> {
1114
match self.target {
1215
None => None,

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
//! # openml-rust
22
//!
3+
//! The openml crate provides functions to fetch tasks and data sets from https://openml.org, and
4+
//! run them with machine learning models.
5+
//!
36
//! ## Example
47
//!
58
//! ```rust

src/measure_accumulator.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,29 @@
1+
//! Measure accumulators are summaries of model performance, such as classification accuracy or
2+
//! regression error.
3+
14
use std::marker::PhantomData;
25
use num_traits::AsPrimitive;
36

7+
/// Trait implemented by performance measures
48
pub trait MeasureAccumulator<T> {
9+
/// initialize new measure
510
fn new() -> Self;
11+
12+
/// update with one prediction
613
fn update_one(&mut self, known: &T, pred: &T);
14+
15+
/// get resulting performance
716
fn result(&self) -> f64;
817

18+
/// update with multiple predictions
919
fn update<I: Iterator<Item = T>>(&mut self, known: I, predicted: I) {
1020
for (k, p) in known.zip(predicted) {
1121
self.update_one(&k, &p)
1222
}
1323
}
1424
}
1525

26+
/// Classification Accuracy: relative amount of correctly classified labels
1627
#[derive(Debug)]
1728
pub struct PredictiveAccuracy<T> {
1829
n_correct: usize,
@@ -45,6 +56,7 @@ where
4556
}
4657
}
4758

59+
/// Root Mean Squared Error
4860
#[derive(Debug)]
4961
pub struct RootMeanSquaredError<T> {
5062
sum_of_squares: f64,

src/openml_api/api_types.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use serde_json;
22

3+
/// Generic JSON response as returned by the OpenML API
34
#[derive(Debug, Serialize, Deserialize)]
45
pub struct GenericResponse(serde_json::Value);
56

@@ -10,8 +11,9 @@ impl GenericResponse {
1011
}
1112
}
1213

14+
/// A row in a split file
1315
#[derive(Debug, Deserialize)]
14-
pub struct CrossValItem {
16+
pub(crate) struct CrossValItem {
1517
#[serde(rename = "type")]
1618
pub purpose: TrainTest,
1719

@@ -23,14 +25,15 @@ pub struct CrossValItem {
2325
}
2426

2527
#[derive(Debug, Deserialize)]
26-
pub enum TrainTest {
28+
pub(crate) enum TrainTest {
2729
#[serde(rename = "TRAIN")]
2830
Train,
2931

3032
#[serde(rename = "TEST")]
3133
Test,
3234
}
3335

36+
/// Cost matrix, used by some classification tasks - currently UNIMPLEMENTED
3437
#[derive(Debug)]
3538
pub(crate) enum CostMatrix {
3639
None,

src/openml_api/file_lock.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,46 @@
1+
//! file locking mechanisms
2+
13
use std::fs::File;
24
use std::io::{self, Read, Write};
35

46
use fs2::FileExt;
57

8+
/// A scoped exclusive lock for use by file writers
69
pub struct ExclusiveLock {
710
file: File,
811
}
912

1013
impl ExclusiveLock {
14+
/// acquire locked file
1115
pub fn new(file: File) -> io::Result<Self> {
1216
file.lock_exclusive()?;
1317
Ok(ExclusiveLock { file })
1418
}
1519
}
1620

1721
impl Drop for ExclusiveLock {
22+
/// release locked file
1823
fn drop(&mut self) {
1924
self.file.unlock().unwrap();
2025
}
2126
}
2227

2328
impl Read for ExclusiveLock {
29+
/// read from locked file
2430
#[inline(always)]
2531
fn read(&mut self, data: &mut [u8]) -> io::Result<usize> {
2632
self.file.read(data)
2733
}
2834
}
2935

3036
impl Write for ExclusiveLock {
37+
/// write to locked file
3138
#[inline(always)]
3239
fn write(&mut self, data: &[u8]) -> io::Result<usize> {
3340
self.file.write(data)
3441
}
3542

43+
/// flush buffer of locked file
3644
#[inline(always)]
3745
fn flush(&mut self) -> io::Result<()> {
3846
self.file.flush()
@@ -43,34 +51,26 @@ pub struct SharedLock {
4351
file: File,
4452
}
4553

54+
/// A scoped shared lock for use by file readers
4655
impl SharedLock {
56+
/// acquire locked file
4757
pub fn new(file: File) -> io::Result<Self> {
4858
file.lock_shared()?;
4959
Ok(SharedLock { file })
5060
}
5161
}
5262

5363
impl Drop for SharedLock {
64+
/// release locked file
5465
fn drop(&mut self) {
5566
self.file.unlock().unwrap();
5667
}
5768
}
5869

5970
impl Read for SharedLock {
71+
/// read from locked file
6072
#[inline(always)]
6173
fn read(&mut self, data: &mut [u8]) -> io::Result<usize> {
6274
self.file.read(data)
6375
}
6476
}
65-
66-
impl Write for SharedLock {
67-
#[inline(always)]
68-
fn write(&mut self, data: &[u8]) -> io::Result<usize> {
69-
self.file.write(data)
70-
}
71-
72-
#[inline(always)]
73-
fn flush(&mut self) -> io::Result<()> {
74-
self.file.flush()
75-
}
76-
}

src/openml_api/impls_from_json.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
//! implementations to convert the API's JSON responses into corresponding Rust structures
12
use arff;
23
use arff::dynamic::DataSet as ArffDataSet;
34
use serde_json;

src/openml_api/impls_from_openml.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
//! implementations to load tasks from the OpenML API.
12
use serde_json;
23

34
use error::Result;

0 commit comments

Comments
 (0)