Skip to content

Commit 997b766

Browse files
Couple changes to the API
1 parent aa5dc35 commit 997b766

File tree

3 files changed

+103
-31
lines changed

3 files changed

+103
-31
lines changed

pom.xml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<modelVersion>4.0.0</modelVersion>
44
<groupId>io.frictionlessdata</groupId>
55
<artifactId>datapackage-java</artifactId>
6-
<version>0.5.1-SNAPSHOT</version>
6+
<version>0.6.0-SNAPSHOT</version>
77
<packaging>jar</packaging>
88
<issueManagement>
99
<url>https://github.com/frictionlessdata/datapackage-java/issues</url>
@@ -20,7 +20,7 @@
2020
<java.version>8</java.version>
2121
<maven.compiler.source>${java.version}</maven.compiler.source>
2222
<maven.compiler.target>${java.version}</maven.compiler.target>
23-
<tableschema-java-version>0.5.1</tableschema-java-version>
23+
<tableschema-java-version>0.6.0</tableschema-java-version>
2424
<hamcrest.version>1.3</hamcrest.version>
2525
<junit.version>5.9.1</junit.version>
2626
<slf4j-simple.version>2.0.5</slf4j-simple.version>
@@ -263,6 +263,12 @@
263263
<artifactId>tableschema-java</artifactId>
264264
<version>${tableschema-java-version}</version>
265265
</dependency>
266-
266+
<!--
267+
<dependency>
268+
<groupId>io.frictionlessdata</groupId>
269+
<artifactId>tableschema-java</artifactId>
270+
<version>0.5.1-SNAPSHOT</version>
271+
<scope>compile</scope>
272+
</dependency> -->
267273
</dependencies>
268274
</project>

src/main/java/io/frictionlessdata/datapackage/resource/AbstractResource.java

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ private Iterator<String[]> stringArrayIterator(boolean relations) throws Excepti
8080
Iterator[] tableIteratorArray = new TableIterator[tables.size()];
8181
int cnt = 0;
8282
for (Table table : tables) {
83-
tableIteratorArray[cnt++] = table.iterator(false, false, false, relations);
83+
tableIteratorArray[cnt++] = table.stringArrayIterator(relations);
8484
}
8585
return new IteratorChain<>(tableIteratorArray);
8686
}
@@ -91,7 +91,7 @@ public Iterator<String[]> stringArrayIterator() throws Exception{
9191
Iterator<String[]>[] tableIteratorArray = new TableIterator[tables.size()];
9292
int cnt = 0;
9393
for (Table table : tables) {
94-
tableIteratorArray[cnt++] = table.stringArrayIterator(false);
94+
tableIteratorArray[cnt++] = table.stringArrayIterator();
9595
}
9696
return new IteratorChain<>(tableIteratorArray);
9797
}
@@ -102,7 +102,7 @@ public Iterator<Map<String, Object>> mappingIterator(boolean relations) throws E
102102
Iterator<Map<String, Object>>[] tableIteratorArray = new TableIterator[tables.size()];
103103
int cnt = 0;
104104
for (Table table : tables) {
105-
tableIteratorArray[cnt++] = table.keyedIterator(false, true, relations);
105+
tableIteratorArray[cnt++] = table.mappingIterator(false, true, relations);
106106
}
107107
return new IteratorChain(tableIteratorArray);
108108
}
@@ -117,17 +117,46 @@ public Iterator<C> beanIterator(Class<C> beanType, boolean relations) throws Exc
117117
return ic;
118118
}
119119

120+
/**
121+
* Read all data from a Resource, each row as String arrays. This can be used for smaller datapackages,
122+
* but for huge or unknown sizes, reading via iterator is preferred, as this method loads all data into RAM.
123+
*
124+
* It can be configured to return table rows with relations to other data sources resolved
125+
*
126+
* The method uses Iterators provided by {@link Table} class, and is roughly implemented after
127+
* https://github.com/frictionlessdata/tableschema-py/blob/master/tableschema/table.py
128+
*
129+
* @param relations true: follow relations
130+
* @return A list of table rows.
131+
* @throws Exception if parsing the data fails
132+
*
133+
*/
120134
@JsonIgnore
121-
public List<String[]> getData() throws Exception{
135+
public List<String[]> getData(boolean relations) throws Exception{
122136
List<String[]> retVal = new ArrayList<>();
123137
ensureDataLoaded();
124-
Iterator<String[]> iter = stringArrayIterator();
138+
Iterator<String[]> iter = stringArrayIterator(relations);
125139
while (iter.hasNext()) {
126140
retVal.add(iter.next());
127141
}
128142
return retVal;
129143
}
130144

145+
/**
146+
* Read all data from a Resource, each row as Map objects. This can be used for smaller datapackages,
147+
* but for huge or unknown sizes, reading via iterator is preferred, as this method loads all data into RAM.
148+
*
149+
* The method returns Map&lt;String,Object&gt; where key is the header name, and val is the data.
150+
* It can be configured to return table rows with relations to other data sources resolved
151+
*
152+
* The method uses Iterators provided by {@link Table} class, and is roughly implemented after
153+
* https://github.com/frictionlessdata/tableschema-py/blob/master/tableschema/table.py
154+
*
155+
* @param relations true: follow relations
156+
* @return A list of table rows.
157+
* @throws Exception if parsing the data fails
158+
*
159+
*/
131160
@Override
132161
public List<Map<String, Object>> getMappedData(boolean relations) throws Exception {
133162
List<Map<String, Object>> retVal = new ArrayList<>();
@@ -148,13 +177,20 @@ public List<Map<String, Object>> getMappedData(boolean relations) throws Excepti
148177
* Most customizable method to retrieve all data in a Resource. Parameters match those in
149178
* {@link io.frictionlessdata.tableschema.Table#iterator(boolean, boolean, boolean, boolean)}. Data can be
150179
* returned as:
151-
*
152-
* - String arrays,
153-
* - as Object arrays (parameter `cast` = true),
154-
* - as a Map&lt;key,val&gt; where key is the header name, and val is the data (parameter `keyed` = true),
155-
* - or in an "extended" form (parameter `extended` = true) that returns an Object array where the first entry is the
180+
* <ul>
181+
* <li>String arrays,</li>
182+
* <li>as Object arrays (parameter `cast` = true),</li>
183+
* <li>as a Map&lt;String,Object&gt; where key is the header name, and val is the data (parameter `keyed` = true),
184+
* <li>or in an "extended" form (parameter `extended` = true) that returns an Object array where the first entry is the
156185
* row number, the second is a String array holding the headers, and the third is an Object array holding
157-
* the row data.
186+
* the row data.</li>
187+
*</ul>
188+
* The following rules apply:
189+
* <ul>
190+
* <li>if no Schema is present, rows will always return string, not objects, as if `cast` was always off</li>
191+
* <li>if `extended` is true, then `cast` is also true, but `keyed` is false</li>
192+
* <li>if `keyed` is true, then `cast` is also true, but `extended` is false</li>
193+
* </ul>
158194
* @param keyed returns data as Maps
159195
* @param extended returns data in "extended form"
160196
* @param cast returns data as Objects, not Strings
@@ -166,13 +202,15 @@ public List<Object> getData(boolean keyed, boolean extended, boolean cast, boole
166202
List<Object> retVal = new ArrayList<>();
167203
ensureDataLoaded();
168204
Iterator iter;
169-
if (cast) {
205+
if (keyed) {
206+
iter = mappingIterator(relations);
207+
} else if (cast) {
170208
iter = objectArrayIterator(extended, relations);
171209
} else {
172210
iter = stringArrayIterator(relations);
173211
}
174212
while (iter.hasNext()) {
175-
retVal.add((Object[])iter.next());
213+
retVal.add(iter.next());
176214
}
177215
return retVal;
178216
}

src/main/java/io/frictionlessdata/datapackage/resource/Resource.java

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package io.frictionlessdata.datapackage.resource;
22

3+
import com.fasterxml.jackson.annotation.JsonIgnore;
34
import com.fasterxml.jackson.databind.JsonNode;
45
import com.fasterxml.jackson.databind.node.ArrayNode;
56
import com.fasterxml.jackson.databind.node.ObjectNode;
@@ -44,9 +45,26 @@ public interface Resource<T,C> {
4445

4546
String getJson();
4647

48+
/**
49+
* Read all data from a Resource, each row as String arrays. This can be used for smaller datapackages,
50+
* but for huge or unknown sizes, reading via iterator is preferred, as this method loads all data into RAM.
51+
*
52+
* It can be configured to return table rows with relations to other data sources resolved
53+
*
54+
* The method uses Iterators provided by {@link Table} class, and is roughly implemented after
55+
* https://github.com/frictionlessdata/tableschema-py/blob/master/tableschema/table.py
56+
*
57+
* @param relations true: follow relations
58+
* @return A list of table rows.
59+
* @throws Exception if parsing the data fails
60+
*
61+
*/
62+
@JsonIgnore
63+
public List<String[]> getData(boolean relations) throws Exception;
64+
4765
/**
4866
* Read all data from a Resource, each row as Map objects. This can be used for smaller datapackages,
49-
* but for huge or unknown sizes, reading via iterator is preferred.
67+
* but for huge or unknown sizes, reading via iterator is preferred, as this method loads all data into RAM.
5068
*
5169
* The method returns Map&lt;String,Object&gt; where key is the header name, and val is the data.
5270
* It can be configured to return table rows with relations to other data sources resolved
@@ -62,14 +80,17 @@ public interface Resource<T,C> {
6280
List<Map<String, Object>> getMappedData(boolean relations) throws Exception;
6381

6482
/**
65-
* Read all data from a Resource. This can be used for smaller datapackages, but for huge or unknown
66-
* sizes, reading via iterator is preferred.
83+
* Most customizable method to retrieve all data in a Resource. Parameters match those in
84+
* {@link io.frictionlessdata.tableschema.Table#iterator(boolean, boolean, boolean, boolean)}.
85+
* This can be used for smaller datapackages, but for huge or unknown
86+
* sizes, reading via iterator is preferred, as this method loads all data into RAM.
6787
*
6888
* The method can be configured to return table rows as:
6989
* <ul>
7090
* <li>String arrays (parameter `cast` = false)</li>
7191
* <li>as Object arrays (parameter `cast` = true)</li>
72-
* <li>as a Map&lt;key,val&gt; where key is the header name, and val is the data (parameter `keyed` = true)</li>
92+
* <li>as a Map&lt;String,Object&gt; where key is the header name, and val is
93+
* the data (parameter `keyed` = true)</li>
7394
* <li>in an "extended" form (parameter `extended` = true) that returns an Object array where the first entry
7495
* is the row number, the second is a String array holding the headers,
7596
* and the third is an Object array holding the row data.</li>
@@ -91,7 +112,8 @@ public interface Resource<T,C> {
91112

92113
/**
93114
* Read all data from a Resource. This can be used for smaller datapackages, but for huge or unknown
94-
* sizes, reading via iterator is preferred. The method ignores relations.
115+
* sizes, reading via iterator is preferred, as this method loads all data into RAM.
116+
* The method ignores relations.
95117
*
96118
* Returns as a List of Java objects of the type `beanClass`. Under the hood, it uses a {@link TableIterator}
97119
* for reading based on a Java Bean class instead of a {@link io.frictionlessdata.tableschema.schema.Schema}.
@@ -124,23 +146,27 @@ public interface Resource<T,C> {
124146
void writeSchema(Path parentFilePath) throws IOException;
125147

126148
/**
127-
* Returns an Iterator that returns rows as object-arrays
128-
* @return Row iterator
149+
* Returns an Iterator that returns rows as object-arrays. Values in each column
150+
* are parsed and converted ("cast") to Java objects based on the Field definitions of the Schema.
151+
* @return Iterator returning table rows as Object Arrays
129152
* @throws Exception if parsing the data fails
130153
*/
131154
Iterator<Object[]> objectArrayIterator() throws Exception;
132155

133156
/**
134-
* Returns an Iterator that returns rows as object-arrays
135-
* @return Row Iterator
157+
* Returns an Iterator that returns rows as object-arrays. Values in each column
158+
* are parsed and converted ("cast") to Java objects based on the Field definitions of the Schema.
159+
* @return Iterator returning table rows as Object Arrays
136160
* @throws Exception if parsing the data fails
137161
*/
138162
Iterator<Object[]> objectArrayIterator(boolean extended, boolean relations) throws Exception;
139163

140-
141164
/**
142-
* Returns an Iterator that returns rows as a Map&lt;key,val&gt; where key is the header name, and val is the data
143-
* @return Row Iterator
165+
* Returns an Iterator that returns rows as a Map&lt;key,val&gt; where key is the header name, and val is the data.
166+
* It can be configured to follow relations
167+
*
168+
* @param relations Whether references to other data sources get resolved
169+
* @return Iterator that returns rows as Maps.
144170
* @throws Exception if parsing the data fails
145171
*/
146172
Iterator<Map<String, Object>> mappingIterator(boolean relations) throws Exception;
@@ -156,10 +182,12 @@ public interface Resource<T,C> {
156182
* @param relations follow relations to other data source
157183
*/
158184
Iterator<C> beanIterator(Class<C> beanType, boolean relations)throws Exception;
185+
159186
/**
160-
* Returns an Iterator that returns rows as string-arrays
161-
* @return Row Iterator
162-
* @throws Exception if parsing the data fails
187+
* This method creates an Iterator that will return table rows as String arrays.
188+
* It therefore disregards the Schema set on the table. It does not follow relations.
189+
*
190+
* @return Iterator that returns rows as string arrays.
163191
*/
164192
public Iterator<String[]> stringArrayIterator() throws Exception;
165193

0 commit comments

Comments
 (0)