66import java .util .Arrays ;
77import java .util .List ;
88
9+ import aima .learning .framework .DataSet ;
10+ import aima .learning .framework .Example ;
11+ import aima .learning .statistics .Numerizer ;
12+ import aima .util .Pair ;
913import aima .util .Util ;
1014
1115public abstract class NNDataSet {
@@ -21,7 +25,7 @@ public abstract class NNDataSet {
2125 /*
2226 * a copy from which examples are drawn.
2327 */
24- private List <NNExample > presentlyProcessed ;
28+ private List <NNExample > presentlyProcessed = new ArrayList < NNExample >(); ;
2529
2630 /*
2731 * list of mean Values for all components of raw data set
@@ -35,7 +39,7 @@ public abstract class NNDataSet {
3539 /*
3640 * the normalized data set
3741 */
38- private List <List <Double >> nds ;
42+ protected List <List <Double >> nds ;
3943
4044 /*
4145 * the column numbers of the "target"
@@ -91,6 +95,21 @@ public void createNormalizedDataFromFile(String filename) throws Exception {
9195 nds = normalize (rds );
9296 }
9397
98+ /*
99+ * create a normalized data "table" from the DataSet using numerizer. At
100+ * this stage, the data is *not* split into input pattern and targets TODO
101+ * remove redundancy of recreating the target columns. the numerizer has
102+ * already isolated the targets
103+ */
104+
105+ public void createNormalizedDataFromDataSet (DataSet ds , Numerizer numerizer )
106+ throws Exception {
107+
108+ List <List <Double >> rds = rawExamplesFromDataSet (ds , numerizer );
109+ // normalize raw dataset
110+ nds = normalize (rds );
111+ }
112+
94113 private List <List <Double >> normalize (List <List <Double >> rds ) {
95114 int rawDataLength = rds .get (0 ).size ();
96115 List <List <Double >> nds = new ArrayList <List <Double >>();
@@ -140,6 +159,27 @@ private List<Double> exampleFromString(String line, String separator) {
140159 return rexample ;
141160 }
142161
162+ private List <List <Double >> rawExamplesFromDataSet (DataSet ds ,
163+ Numerizer numerizer ) {
164+ // assumes all values for inout and target are doubles
165+ List <List <Double >> rds = new ArrayList <List <Double >>();
166+ for (int i = 0 ; i < ds .size (); i ++) {
167+ List <Double > rexample = new ArrayList <Double >();
168+ Example e = ds .getExample (i );
169+ Pair <List <Double >, List <Double >> p = numerizer .numerize (e );
170+ List <Double > attributes = p .getFirst ();
171+ for (Double d : attributes ) {
172+ rexample .add (d );
173+ }
174+ List <Double > targets = p .getSecond ();
175+ for (Double d : targets ) {
176+ rexample .add (d );
177+ }
178+ rds .add (rexample );
179+ }
180+ return rds ;
181+ }
182+
143183 /*
144184 * Gets (and removes) a random example from the 'presentlyProcessed'
145185 */
@@ -149,6 +189,14 @@ public NNExample getExampleAtRandom() {
149189 return presentlyProcessed .remove (i );
150190 }
151191
192+ /*
193+ * Gets (and removes) a random example from the 'presentlyProcessed'
194+ */
195+ public NNExample getExample (int index ) {
196+
197+ return presentlyProcessed .remove (index );
198+ }
199+
152200 /*
153201 * check if any more examples remain to be processed
154202 */
@@ -185,6 +233,18 @@ public void createExamplesFromFile(String filename) throws Exception {
185233
186234 }
187235
236+ /*
237+ * method called by clients to set up data set and make it ready for
238+ * processing
239+ */
240+ public void createExamplesFromDataSet (DataSet ds , Numerizer numerizer )
241+ throws Exception {
242+ createNormalizedDataFromDataSet (ds , numerizer );
243+ setTargetColumns ();
244+ createExamples ();
245+
246+ }
247+
188248 public List <List <Double >> getNormalizedData () {
189249 return nds ;
190250 }
0 commit comments