Skip to content

Commit a130620

Browse files
Better support and tests for non-tabular resources
1 parent d7a45da commit a130620

File tree

14 files changed

+263
-136
lines changed

14 files changed

+263
-136
lines changed

pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<modelVersion>4.0.0</modelVersion>
55
<groupId>io.frictionlessdata</groupId>
66
<artifactId>datapackage-java</artifactId>
7-
<version>0.8.3-SNAPSHOT</version>
7+
<version>0.9.0-SNAPSHOT</version>
88
<packaging>jar</packaging>
99
<issueManagement>
1010
<url>https://github.com/frictionlessdata/datapackage-java/issues</url>
@@ -23,7 +23,7 @@
2323
<maven.compiler.source>${java.version}</maven.compiler.source>
2424
<maven.compiler.target>${java.version}</maven.compiler.target>
2525
<maven.compiler.compiler>${java.version}</maven.compiler.compiler>
26-
<tableschema-java-version>0.8.3</tableschema-java-version>
26+
<tableschema-java-version>0.9.0</tableschema-java-version>
2727
<junit.version>5.12.0</junit.version>
2828
<slf4j-simple.version>2.0.17</slf4j-simple.version>
2929
<apache-commons-collections4.version>4.4</apache-commons-collections4.version>

src/main/java/io/frictionlessdata/datapackage/Dialect.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public class Dialect implements Cloneable {
4949
public static final Dialect DEFAULT = new Dialect(){
5050
private JsonNode JsonNode;
5151

52-
public String getJson() {
52+
public String asJson() {
5353
lazyCreate();
5454
return JsonNode.toString();
5555
}
@@ -225,7 +225,7 @@ public static Dialect fromJson(String json) {
225225
* @return a String representing the properties of this object encoded as JSON
226226
*/
227227
@JsonIgnore
228-
public String getJson() {
228+
public String asJson() {
229229
return getJsonNode(true).toString();
230230
}
231231

@@ -242,7 +242,7 @@ public void writeJson (File outputFile) throws IOException{
242242

243243
public void writeJson (OutputStream output) throws IOException{
244244
try (BufferedWriter file = new BufferedWriter(new OutputStreamWriter(output, StandardCharsets.UTF_8))) {
245-
file.write(this.getJson());
245+
file.write(this.asJson());
246246
}
247247
}
248248

@@ -253,7 +253,7 @@ public void writeDialect(Path parentFilePath) throws IOException {
253253
}
254254
Files.deleteIfExists(parentFilePath);
255255
try (Writer wr = Files.newBufferedWriter(parentFilePath, StandardCharsets.UTF_8)) {
256-
wr.write(getJson());
256+
wr.write(asJson());
257257
}
258258
}
259259

src/main/java/io/frictionlessdata/datapackage/Package.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import io.frictionlessdata.tableschema.exception.JsonParsingException;
1919
import io.frictionlessdata.tableschema.exception.ValidationException;
2020
import io.frictionlessdata.tableschema.io.LocalFileReference;
21-
import io.frictionlessdata.tableschema.schema.Schema;
2221
import io.frictionlessdata.tableschema.util.JsonUtil;
2322
import org.apache.commons.collections.list.UnmodifiableList;
2423
import org.apache.commons.collections.set.UnmodifiableSet;
@@ -384,6 +383,18 @@ public Object getProperty(String key, TypeReference<?> typeRef) {
384383
* @return JSON-String representation of the Package
385384
*/
386385
@JsonIgnore
386+
public String asJson(){
387+
return getJsonNode().toPrettyString();
388+
}
389+
390+
/**
391+
* Convert both the descriptor and all linked Resources to JSON and return them.
392+
* @return JSON-String representation of the Package
393+
*
394+
* Deprecated, use {@link #asJson()} instead.
395+
*/
396+
@Deprecated
397+
@JsonIgnore
387398
public String getJson(){
388399
return getJsonNode().toPrettyString();
389400
}
@@ -711,7 +722,7 @@ final URL getBaseUrl(){
711722
}
712723

713724
@JsonIgnore
714-
protected ObjectNode getJsonNode(){
725+
private ObjectNode getJsonNode(){
715726
ObjectNode objectNode = (ObjectNode) JsonUtil.getInstance().createNode(this);
716727
// update any manually set properties
717728
this.jsonObject.fields().forEachRemaining(f->{

src/main/java/io/frictionlessdata/datapackage/fk/PackageForeignKey.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import java.util.List;
1414

1515
/**
16-
* PackageForeignKey is a wrapper around the ForeignKey class to validate foreign keys
16+
* PackageForeignKey is a wrapper around the {@link io.frictionlessdata.tableschema.fk.ForeignKey} class to validate foreign keys
1717
* in the context of a data package. It checks if the referenced resource and fields exist
1818
* in the data package and validates the foreign key constraints.
1919
*
@@ -36,9 +36,18 @@ public PackageForeignKey(ForeignKey fk, Resource<?> res, Package pkg) {
3636
this.fk = fk;
3737
}
3838

39+
/**
40+
* Formal validation of the foreign key. This method checks if the referenced resource and fields exist.
41+
* It does not check the actual data in the tables.
42+
*
43+
* Verification of table data against the foreign key constraints is done in
44+
* {@link io.frictionlessdata.datapackage.resource.AbstractResource#checkRelations}.
45+
*
46+
* @throws Exception if the foreign key relation is invalid.
47+
*/
3948
public void validate() throws Exception {
4049
Reference reference = fk.getReference();
41-
// self-reference, this can be validated by the Tableschema {@link ForeignKey} class
50+
// self-reference, this can be validated by the Tableschema {@link io.frictionlessdata.tableschema.fk.ForeignKey} class
4251
if (reference.getResource().equals("")) {
4352
for (Table table : resource.getTables()) {
4453
fk.validate(table);

src/main/java/io/frictionlessdata/datapackage/resource/AbstractDataResource.java

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import java.util.Set;
1616

1717
/**
18-
* Abstract base class for all Resources that are based on directly set data, that is not on
18+
* Abstract base class for all Resources that are based on directly set tabular data, that is not on
1919
* data specified as files or URLs.
2020
*
2121
* @param <T> the data format, either CSV or JSON array
@@ -79,26 +79,6 @@ public Set<String> getDatafileNamesForWriting() {
7979
return names;
8080
}
8181

82-
/**
83-
* write out any resource to a CSV file. It creates a file with a file name taken from
84-
* the Resource name. Subclasses might override this to write data differently (eg. to the
85-
* same files it was read from.
86-
* @param outputDir the directory to write to.
87-
* @param dialect the CSV dialect to use for writing
88-
* @throws Exception thrown if writing fails.
89-
*/
90-
91-
public void writeDataAsCsv(Path outputDir, Dialect dialect) throws Exception {
92-
Dialect lDialect = (null != dialect) ? dialect : Dialect.DEFAULT;
93-
String fileName = super.getName()
94-
.toLowerCase()
95-
.replaceAll("\\W", "_")
96-
+".csv";
97-
List<Table> tables = getTables();
98-
Path p = outputDir.resolve(fileName);
99-
writeTableAsCsv(tables.get(0), lDialect, p);
100-
}
101-
10282
@JsonIgnore
10383
abstract String getResourceFormat();
10484
}

src/main/java/io/frictionlessdata/datapackage/resource/AbstractReferencebasedResource.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,16 +75,18 @@ public Collection<T> getPaths() {
7575
@Override
7676
@JsonIgnore
7777
public Set<String> getDatafileNamesForWriting() {
78-
List<String> paths = new ArrayList<>(((FilebasedResource)this).getReferencesAsStrings());
78+
List<String> paths = new ArrayList<>(this.getReferencesAsStrings());
7979
return paths.stream().map((p) -> {
8080
if (p.toLowerCase().endsWith("."+ TableDataSource.Format.FORMAT_CSV.getLabel())){
8181
int i = p.toLowerCase().indexOf("."+TableDataSource.Format.FORMAT_CSV.getLabel());
8282
return p.substring(0, i);
8383
} else if (p.toLowerCase().endsWith("."+TableDataSource.Format.FORMAT_JSON.getLabel())){
8484
int i = p.toLowerCase().indexOf("."+TableDataSource.Format.FORMAT_JSON.getLabel());
8585
return p.substring(0, i);
86+
} else {
87+
int i = p.lastIndexOf(".");
88+
return p.substring(0, i);
8689
}
87-
return p;
8890
}).collect(Collectors.toSet());
8991
}
9092

src/main/java/io/frictionlessdata/datapackage/resource/AbstractResource.java

Lines changed: 24 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.apache.commons.csv.CSVFormat;
3535
import org.apache.commons.csv.CSVPrinter;
3636

37+
import java.io.FileOutputStream;
3738
import java.io.IOException;
3839
import java.io.Writer;
3940
import java.net.URI;
@@ -67,7 +68,7 @@ public abstract class AbstractResource<T> extends JSONBase implements Resource<T
6768
boolean serializeToFile = true;
6869

6970
@JsonIgnore
70-
private String serializationFormat;
71+
String serializationFormat;
7172

7273
@JsonIgnore
7374
final List<DataPackageValidationException> errors = new ArrayList<>();
@@ -398,7 +399,7 @@ public void checkRelations(Package pkg) {
398399
for (String key : row.keySet()) {
399400
for (PackageForeignKey fk : map.keySet()) {
400401
if (fk.getForeignKey().getFieldNames().contains(key)) {
401-
List<Object>refData = (List<Object>) map.get(fk);
402+
List<Object>refData = map.get(fk);
402403
Map<String, String> fieldMapping = fk.getForeignKey().getFieldMapping();
403404
String refFieldName = fieldMapping.get(key);
404405
Object fkVal = row.get(key);
@@ -413,7 +414,11 @@ public void checkRelations(Package pkg) {
413414
}
414415
}
415416
if (!found) {
416-
throw new ForeignKeyException("Foreign key validation failed: " + fk.getForeignKey().getFieldNames() + " -> " + fk.getForeignKey().getReference().getFieldNames() + ": '" + fkVal + "' not found in resource '"+fk.getForeignKey().getReference().getResource()+"'.");
417+
throw new ForeignKeyException("Foreign key validation failed: "
418+
+ fk.getForeignKey().getFieldNames() + " -> "
419+
+ fk.getForeignKey().getReference().getFieldNames() + ": '"
420+
+ fkVal + "' not found in resource '"
421+
+ fk.getForeignKey().getReference().getResource()+"'.");
417422
}
418423
}
419424
}
@@ -443,67 +448,6 @@ public void validate(Package pkg) {
443448
}
444449
}
445450

446-
/**
447-
* Get JSON representation of the object.
448-
* @return a JSONObject representing the properties of this object
449-
*/
450-
@JsonIgnore
451-
public String getJson(){
452-
ObjectNode json = (ObjectNode) JsonUtil.getInstance().createNode(this);
453-
454-
if (this instanceof URLbasedResource) {
455-
json.set(JSON_KEY_PATH, ((URLbasedResource) this).getPathJson());
456-
} else if (this instanceof FilebasedResource) {
457-
if (this.shouldSerializeToFile()) {
458-
json.set(JSON_KEY_PATH, ((FilebasedResource) this).getPathJson());
459-
} else {
460-
try {
461-
ArrayNode data = JsonUtil.getInstance().createArrayNode();
462-
List<Table> tables = readData();
463-
for (Table t : tables) {
464-
ArrayNode arr = JsonUtil.getInstance().createArrayNode(t.asJson());
465-
arr.elements().forEachRemaining(o->data.add(o));
466-
}
467-
json.set(JSON_KEY_DATA, data);
468-
} catch (Exception ex) {
469-
throw new DataPackageException(ex);
470-
}
471-
}
472-
} else if ((this instanceof AbstractDataResource)) {
473-
if (this.shouldSerializeToFile()) {
474-
//TODO implement storing only the path - and where to get it
475-
} else {
476-
try {
477-
json.set(JSON_KEY_DATA, JsonUtil.getInstance().createNode(this.getRawData()));
478-
} catch (IOException e) {
479-
throw new DataPackageException(e);
480-
}
481-
}
482-
}
483-
484-
String schemaObj = originalReferences.get(JSONBase.JSON_KEY_SCHEMA);
485-
if ((null == schemaObj) && (null != schema)) {
486-
if (null != schema.getReference()) {
487-
schemaObj = JSON_KEY_SCHEMA + "/" + schema.getReference().getFileName();
488-
}
489-
}
490-
if(Objects.nonNull(schemaObj)) {
491-
json.put(JSON_KEY_SCHEMA, schemaObj);
492-
}
493-
494-
String dialectObj = originalReferences.get(JSONBase.JSON_KEY_DIALECT);
495-
if ((null == dialectObj) && (null != dialect)) {
496-
if (null != dialect.getReference()) {
497-
dialectObj = JSON_KEY_DIALECT + "/" + dialect.getReference().getFileName();
498-
}
499-
}
500-
if(Objects.nonNull(dialectObj)) {
501-
json.put(JSON_KEY_DIALECT, dialectObj);
502-
}
503-
return json.toString();
504-
}
505-
506-
507451
public void writeSchema(Path parentFilePath) throws IOException {
508452
String relPath = getPathForWritingSchema();
509453
if (null == originalReferences.get(JSONBase.JSON_KEY_SCHEMA) && Objects.nonNull(relPath)) {
@@ -547,7 +491,7 @@ private static void writeDialect(Path parentFilePath, Dialect dialect) throws IO
547491
}
548492
Files.deleteIfExists(parentFilePath);
549493
try (Writer wr = Files.newBufferedWriter(parentFilePath, StandardCharsets.UTF_8)) {
550-
wr.write(dialect.getJson());
494+
wr.write(dialect.asJson());
551495
}
552496
}
553497

@@ -778,11 +722,21 @@ public void writeData(Path outputDir) throws Exception {
778722
Files.createDirectories(p);
779723
}
780724
Files.deleteIfExists(p);
781-
try (Writer wr = Files.newBufferedWriter(p, StandardCharsets.UTF_8)) {
782-
if (serializationFormat.equals(TableDataSource.Format.FORMAT_CSV.getLabel())) {
783-
t.writeCsv(wr, lDialect.toCsvFormat());
784-
} else if (serializationFormat.equals(TableDataSource.Format.FORMAT_JSON.getLabel())) {
785-
wr.write(t.asJson());
725+
726+
// if the serializationFormat is set, serialize the data to JSON/CSV file
727+
if (null != serializationFormat) {
728+
try (Writer wr = Files.newBufferedWriter(p, StandardCharsets.UTF_8)) {
729+
if (serializationFormat.equals(TableDataSource.Format.FORMAT_CSV.getLabel())) {
730+
t.writeCsv(wr, lDialect.toCsvFormat());
731+
} else if (serializationFormat.equals(TableDataSource.Format.FORMAT_JSON.getLabel())) {
732+
wr.write(t.asJson());
733+
}
734+
}
735+
} else {
736+
// if serializationFormat is not set (probably non-tabular data), serialize the data to a binary file
737+
byte [] data = (byte[])this.getRawData();
738+
try (FileOutputStream fos = new FileOutputStream(p.toFile())){
739+
fos.write(data);
786740
}
787741
}
788742
}

src/main/java/io/frictionlessdata/datapackage/resource/FilebasedResource.java

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,14 @@ public FilebasedResource(String name, Collection<File> paths, File basePath, Cha
3939
throw new DataPackageValidationException("Invalid Resource. " +
4040
"The path property cannot be null for file-based Resources.");
4141
}
42-
this.setSerializationFormat(sniffFormat(paths));
42+
String format = sniffFormat(paths);
43+
if (format.equals(TableDataSource.Format.FORMAT_JSON.getLabel())
44+
|| format.equals(TableDataSource.Format.FORMAT_CSV.getLabel())) {
45+
this.setSerializationFormat(format);
46+
} else {
47+
super.setFormat(format);
48+
}
49+
4350
this.basePath = basePath;
4451
for (File path : paths) {
4552
/* from the spec: "SECURITY: / (absolute path) and ../ (relative parent path)
@@ -59,15 +66,30 @@ public FilebasedResource(String name, Collection<File> paths, File basePath) {
5966
this(name, paths, basePath, Charset.defaultCharset());
6067
}
6168

69+
70+
@JsonIgnore
71+
public String getSerializationFormat() {
72+
if (null != serializationFormat)
73+
return serializationFormat;
74+
if (null == format) {
75+
return format;
76+
}
77+
return sniffFormat(paths);
78+
}
79+
6280
private static String sniffFormat(Collection<File> paths) {
6381
Set<String> foundFormats = new HashSet<>();
64-
paths.forEach((p) -> {
82+
for (File p : paths) {
6583
if (p.getName().toLowerCase().endsWith(TableDataSource.Format.FORMAT_CSV.getLabel())) {
6684
foundFormats.add(TableDataSource.Format.FORMAT_CSV.getLabel());
6785
} else if (p.getName().toLowerCase().endsWith(TableDataSource.Format.FORMAT_JSON.getLabel())) {
6886
foundFormats.add(TableDataSource.Format.FORMAT_JSON.getLabel());
87+
} else {
88+
// something else -> not a tabular resource
89+
int pos = p.getName().lastIndexOf('.');
90+
return p.getName().substring(pos + 1).toLowerCase();
6991
}
70-
});
92+
}
7193
if (foundFormats.size() > 1) {
7294
throw new DataPackageException("Resources cannot be mixed JSON/CSV");
7395
}

src/main/java/io/frictionlessdata/datapackage/resource/JSONDataResource.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,11 @@
1010
public class JSONDataResource extends AbstractDataResource<ArrayNode> {
1111

1212
public JSONDataResource(String name, String json) {
13-
super(name, JsonUtil.getInstance().createArrayNode(json));
13+
this(name, JsonUtil.getInstance().createArrayNode(json));
14+
}
15+
16+
public JSONDataResource(String name, ArrayNode json) {
17+
super(name, json);
1418
super.format = getResourceFormat();
1519
}
1620

0 commit comments

Comments
 (0)