99using Meta . Numerics . Statistics ;
1010using Meta . Numerics . Statistics . Distributions ;
1111
12+ using Newtonsoft . Json ;
13+
1214namespace Examples {
1315
1416 public static class Data {
1517
18+ public static void ConstructTestCsv ( ) {
19+
20+ using ( TextWriter writer = new StreamWriter ( File . OpenWrite ( "test.csv" ) ) ) {
21+ writer . WriteLine ( "Id, Name, Sex, Birthdate, Height, Weight, Result" ) ;
22+ writer . WriteLine ( "1, John, M, 1970-01-02, 190.0, 75.0, True" ) ;
23+ writer . WriteLine ( "2, Mary, F, 1980-02-03, 155.0, 40.0, True" ) ;
24+ writer . WriteLine ( "3, Luke, M, 1990-03-04, 180.0, 60.0, False" ) ;
25+ }
26+
27+ }
28+
29+ [ ExampleMethod ]
30+ public static void ImportingData ( ) {
31+
32+ FrameTable data ;
33+ using ( TextReader reader = File . OpenText ( "test.csv" ) ) {
34+ data = FrameTable . FromCsv ( reader ) ;
35+ }
36+
37+ Console . WriteLine ( $ "Imported CSV file with { data . Rows . Count } rows.") ;
38+ Console . WriteLine ( "The names and types of the columns are:" ) ;
39+ foreach ( FrameColumn column in data . Columns ) {
40+ Console . WriteLine ( $ " { column . Name } of type { column . StorageType } ") ;
41+ }
42+
43+ FrameTable titanic ;
44+ Uri url = new Uri ( "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv" ) ;
45+ WebRequest request = WebRequest . Create ( url ) ;
46+ using ( WebResponse response = request . GetResponse ( ) ) {
47+ using ( StreamReader reader = new StreamReader ( response . GetResponseStream ( ) ) ) {
48+ titanic = FrameTable . FromCsv ( reader ) ;
49+ }
50+ }
51+
52+ Uri jsonUrl = new Uri ( "https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.json" ) ;
53+ WebClient client = new WebClient ( ) ;
54+ string input = client . DownloadString ( jsonUrl ) ;
55+ List < Dictionary < string , object > > output = JsonConvert . DeserializeObject < List < Dictionary < string , object > > > ( input ) ;
56+ FrameTable jsonExample = FrameTable . FromDictionaries ( output ) ;
57+
58+ // Define the schema.
59+ FrameTable table = new FrameTable ( ) ;
60+ table . AddColumn < int > ( "Id" ) ;
61+ table . AddColumn < string > ( "Name" ) ;
62+ table . AddColumn < string > ( "Sex" ) ;
63+ table . AddColumn < DateTime > ( "Birthdate" ) ;
64+ table . AddColumn < double > ( "Height" ) ;
65+ table . AddColumn < double ? > ( "Weight" ) ;
66+ table . AddColumn < bool > ( "Result" ) ;
67+
68+ // Add rows using as arrays of objects.
69+ table . AddRow ( 1 , "John" , "M" , DateTime . Parse ( "1970-01-02" ) , 190.0 , 75.0 , true ) ;
70+ table . AddRow ( 2 , "Mary" , "F" , DateTime . Parse ( "1980-02-03" ) , 155.0 , null , true ) ;
71+
72+ // Add a row using a dictionary. This is more verbose, but very clear.
73+ table . AddRow ( new Dictionary < string , object > ( ) {
74+ { "Id" , 3 } ,
75+ { "Name" , null } ,
76+ { "Sex" , "M" } ,
77+ { "Birthdate" , DateTime . Parse ( "1990-03-04" ) } ,
78+ { "Height" , 180.0 } ,
79+ { "Weight" , 60.0 } ,
80+ { "Result" , false }
81+ } ) ;
82+
83+ }
84+
1685 [ ExampleMethod ]
1786 public static void ManipulatingData ( ) {
1887
@@ -82,8 +151,18 @@ public static void AnalyzingData () {
82151 table = FrameTable . FromCsv ( reader ) ;
83152 }
84153 }
154+ FrameView view = table . WhereNotNull ( ) ;
155+
156+ // Get the column with (zero-based) index 4.
157+ FrameColumn column4 = view . Columns [ 4 ] ;
158+ // Get the column named "Height".
159+ FrameColumn heightsColumn = view . Columns [ "Height" ] ;
160+ // Even easier way to get the column named "Height".
161+ FrameColumn alsoHeightsColumn = view [ "Height" ] ;
85162
86- SummaryStatistics summary = new SummaryStatistics ( table [ "Height" ] . As < double > ( ) ) ;
163+ IReadOnlyList < double > heights = view [ "Height" ] . As < double > ( ) ;
164+
165+ SummaryStatistics summary = new SummaryStatistics ( view [ "Height" ] . As < double > ( ) ) ;
87166 Console . WriteLine ( $ "Count = { summary . Count } ") ;
88167 Console . WriteLine ( $ "Mean = { summary . Mean } ") ;
89168 Console . WriteLine ( $ "Standard Deviation = { summary . StandardDeviation } ") ;
@@ -92,74 +171,77 @@ public static void AnalyzingData () {
92171 Console . WriteLine ( $ "Estimated population standard deviation = { summary . PopulationStandardDeviation } ") ;
93172
94173 IReadOnlyList < double > maleHeights =
95- table . Where < string > ( "Sex" , s => s == "M" ) . Columns [ "Height" ] . As < double > ( ) ;
174+ view . Where < string > ( "Sex" , s => s == "M" ) . Columns [ "Height" ] . As < double > ( ) ;
96175 IReadOnlyList < double > femaleHeights =
97- table . Where < string > ( "Sex" , s => s == "F" ) . Columns [ "Height" ] . As < double > ( ) ;
176+ view . Where < string > ( "Sex" , s => s == "F" ) . Columns [ "Height" ] . As < double > ( ) ;
98177 TestResult test = Univariate . StudentTTest ( maleHeights , femaleHeights ) ;
99- Console . WriteLine ( $ "{ test . Statistic . Name } = { test . Statistic . Value } , P = { test . Probability } ") ;
178+ Console . WriteLine ( $ "{ test . Statistic . Name } = { test . Statistic . Value } ") ;
179+ Console . WriteLine ( $ "P = { test . Probability } ") ;
100180
101181 TestResult maleHeightNormality = maleHeights . ShapiroFranciaTest ( ) ;
102- TestResult totalHeightNormality = table [ "Height" ] . As < double > ( ) . ShapiroFranciaTest ( ) ;
182+ TestResult totalHeightNormality = view [ "Height" ] . As < double > ( ) . ShapiroFranciaTest ( ) ;
103183 TestResult heightCompatibility = Univariate . KolmogorovSmirnovTest ( maleHeights , femaleHeights ) ;
104184
105185 LinearRegressionResult fit =
106- table [ "Weight" ] . As < double > ( ) . LinearRegression ( table [ "Height" ] . As < double > ( ) ) ;
186+ view [ "Weight" ] . As < double > ( ) . LinearRegression ( view [ "Height" ] . As < double > ( ) ) ;
107187 Console . WriteLine ( $ "Model weight = ({ fit . Slope } ) * height + ({ fit . Intercept } ).") ;
108188 Console . WriteLine ( $ "Model explains { fit . RSquared * 100.0 } % of variation.") ;
109189
110190 ContingencyTable < string , bool > contingency =
111- Bivariate . Crosstabs ( table [ "Sex" ] . As < string > ( ) , table [ "Result" ] . As < bool > ( ) ) ;
191+ Bivariate . Crosstabs ( view [ "Sex" ] . As < string > ( ) , view [ "Result" ] . As < bool > ( ) ) ;
112192 Console . WriteLine ( $ "Male incidence: { contingency . ProbabilityOfColumnConditionalOnRow ( true , "M" ) } ") ;
113- Console . WriteLine ( $ "Female incidence: { contingency . ProbabilityOfColumnConditionalOnRow ( false , "F" ) } ") ;
193+ Console . WriteLine ( $ "Female incidence: { contingency . ProbabilityOfColumnConditionalOnRow ( true , "F" ) } ") ;
114194 Console . WriteLine ( $ "Log odds ratio = { contingency . Binary . LogOddsRatio } ") ;
115195
116- table . AddComputedColumn ( "Bmi" , r => ( ( double ) r [ "Weight" ] ) / MoreMath . Sqr ( ( double ) r [ "Height" ] / 100.0 ) ) ;
117- table . AddComputedColumn ( "Age" , r=> ( DateTime . Now - ( DateTime ) r [ "Birthdate" ] ) . TotalDays / 365.24 ) ;
196+ view . AddComputedColumn ( "Bmi" , r => ( ( double ) r [ "Weight" ] ) / MoreMath . Sqr ( ( double ) r [ "Height" ] / 100.0 ) ) ;
197+ view . AddComputedColumn ( "Age" , r=> ( DateTime . Now - ( DateTime ) r [ "Birthdate" ] ) . TotalDays / 365.24 ) ;
118198
119199 MultiLinearLogisticRegressionResult result =
120- table [ "Result" ] . As < bool > ( ) . MultiLinearLogisticRegression (
121- table [ "Bmi" ] . As < double > ( ) ,
122- table [ "Sex" ] . As < string , double > ( s => s == "M" ? 1.0 : 0.0 )
200+ view [ "Result" ] . As < bool > ( ) . MultiLinearLogisticRegression (
201+ view [ "Bmi" ] . As < double > ( ) ,
202+ view [ "Sex" ] . As < string , double > ( s => s == "M" ? 1.0 : 0.0 )
123203 ) ;
124204 foreach ( Parameter parameter in result . Parameters ) {
125205 Console . WriteLine ( $ "{ parameter . Name } = { parameter . Estimate } ") ;
126206 }
127207
128- //TestResult ageResultPearson = Bivariate.PearsonRTest(table["Age"].As<double>(), table["Result"].As<double>());
129- TestResult spearman = Bivariate . SpearmanRhoTest ( table [ "Age" ] . As < double > ( ) , table [ "Result" ] . As < double > ( ) ) ;
208+ TestResult spearman = Bivariate . SpearmanRhoTest ( view [ "Age" ] . As < double > ( ) , view [ "Result" ] . As < double > ( ) ) ;
130209 Console . WriteLine ( $ "{ spearman . Statistic . Name } = { spearman . Statistic . Value } P = { spearman . Probability } ") ;
131210
132211 }
133212
134- public static void ConstructData ( ) {
213+ public static void ConstructExampleData ( ) {
135214
136215 FrameTable table = new FrameTable ( ) ;
137216 table . AddColumn < int > ( "Id" ) ;
138217 table . AddColumn < string > ( "Name" ) ;
139218 table . AddColumn < string > ( "Sex" ) ;
140219 table . AddColumn < DateTime > ( "Birthdate" ) ;
141- table . AddColumns < double > ( "Height" , "Weight" ) ;
220+ table . AddColumn < double > ( "Height" ) ;
221+ table . AddColumns < double ? > ( "Weight" ) ;
142222 table . AddColumn < bool > ( "Result" ) ;
143223
144- //Random rng = new Random(3);
145- //Random rng = new Random(314159);
146- // Random rng = new Random(271828);
147224 Random rng = new Random ( 1000001 ) ;
148225
149- //string[] maleNames = new string[1024];
150226 string [ ] maleNames = new string [ ] { "Alex" , "Chris" , "David" , "Eric" , "Frederic" , "George" , "Hans" , "Igor" , "John" , "Kevin" , "Luke" , "Mark" , "Oscar" , "Peter" , "Richard" , "Stephan" , "Thomas" , "Vincent" } ;
151227 AddRows ( table , maleNames , "M" , 175.0 , 12.0 , 24.0 , 3.0 , 1 , rng ) ;
152228
153- //string[] femaleNames = new string[1024];
154229 string [ ] femaleNames = new string [ ] { "Anne" , "Belle" , "Dorothy" , "Elizabeth" , "Fiona" , "Helen" , "Julia" , "Kate" , "Louise" , "Mary" , "Natalie" , "Olivia" , "Ruth" , "Sarah" , "Theresa" , "Viola" } ;
155230 AddRows ( table , femaleNames , "F" , 160.0 , 10.0 , 24.0 , 3.0 , 0 , rng ) ;
156231
157- string path = @"C:\Users\dawright\Documents\example.csv" ;
232+ // add rows with nulls
233+ table . AddRow ( table . Rows . Count , null , "M" , DateTime . Parse ( "1970-07-27" ) , 183.0 , 74.0 , false ) ;
234+ table . AddRow ( table . Rows . Count , "Zoey" , "F" , DateTime . Parse ( "2007-09-17" ) , 138.0 , null , false ) ;
235+
236+ string path = @"example.csv" ;
158237 using ( StreamWriter writer = new StreamWriter ( File . OpenWrite ( path ) ) ) {
159238 table . ToCsv ( writer ) ;
160239 }
161240 Console . WriteLine ( File . Exists ( path ) ) ;
162241
242+ string json = JsonConvert . SerializeObject ( table . ToDictionaries ( ) , Formatting . Indented ) ;
243+ File . WriteAllText ( "example.json" , json ) ;
244+
163245 }
164246
165247 private static void AddRows ( FrameTable table , IReadOnlyList < string > names , string sex , double meanHeight , double stddevHeight , double meanBmi , double stddevBmi , int flag , Random rng ) {
0 commit comments