Skip to content

Commit 155f93f

Browse files
Better validation; work on encoding and BOMs
1 parent cd60aea commit 155f93f

File tree

7 files changed

+124
-41
lines changed

7 files changed

+124
-41
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<modelVersion>4.0.0</modelVersion>
44
<groupId>io.frictionlessdata</groupId>
55
<artifactId>tableschema-java</artifactId>
6-
<version>0.6.1-SNAPSHOT</version>
6+
<version>0.6.2-SNAPSHOT</version>
77
<packaging>jar</packaging>
88
<issueManagement>
99
<url>https://github.com/frictionlessdata/tableschema-java/issues</url>

src/main/java/io/frictionlessdata/tableschema/Table.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,8 @@ public void validate() throws TableValidationException, TableSchemaException {
564564
throw new TableValidationException("Found undeclared column: "+col);
565565
}
566566
}
567+
if (null != schema)
568+
schema.validate();
567569
}
568570

569571
/**

src/main/java/io/frictionlessdata/tableschema/schema/Schema.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ public int hashCode() {
411411
* @throws ValidationException If validation fails and validation is strict
412412
*/
413413
@JsonIgnore
414-
void validate() throws ValidationException{
414+
public void validate() throws ValidationException{
415415
String json = this.getJson();
416416
Set<ValidationMessage> messages = tableFormalSchemaValidator.validate(json);
417417
if (!messages.isEmpty()) {

src/main/java/io/frictionlessdata/tableschema/tabledatasource/TableDataSource.java

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,30 @@ public interface TableDataSource {
6666
* @return DataSource created from input String
6767
*/
6868
static TableDataSource fromSource(String input) {
69+
String cleanedInput = input;
70+
try {
71+
byte[] bytes = new byte[input.length()];
72+
input.getBytes(0, input.length(), bytes, 0);
73+
ByteOrderMarkStrippingInputStream is
74+
= new ByteOrderMarkStrippingInputStream(new ByteArrayInputStream(bytes));
75+
is.skipBOM();
76+
77+
InputStreamReader rdr = new InputStreamReader(is);
78+
BufferedReader bfr = new BufferedReader(rdr);
79+
cleanedInput = bfr.lines().collect(Collectors.joining("\n"));
80+
is.close();
81+
rdr.close();
82+
bfr.close();
83+
} catch (Exception ex) {
84+
throw new TableIOException(ex);
85+
}
6986
try {
7087
// JSON array generation. If an exception is thrown -> probably CSV data
71-
ArrayNode json = JsonUtil.getInstance().createArrayNode(input);
72-
return new JsonArrayTableDataSource(json);
88+
ArrayNode json = JsonUtil.getInstance().createArrayNode(cleanedInput);
89+
return new JsonArrayTableDataSource(input);
7390
} catch (Exception ex) {
7491
// JSON parsing failed, treat it as a CSV
75-
return new CsvTableDataSource(input);
92+
return new CsvTableDataSource(cleanedInput);
7693
}
7794
}
7895

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
package io.frictionlessdata.tableschema.table_tests;
2+
3+
import io.frictionlessdata.tableschema.Table;
4+
import io.frictionlessdata.tableschema.exception.TableIOException;
5+
import io.frictionlessdata.tableschema.inputstream.ByteOrderMarkStrippingInputStream;
6+
import io.frictionlessdata.tableschema.schema.Schema;
7+
import org.apache.commons.csv.CSVFormat;
8+
import org.junit.jupiter.api.Assertions;
9+
import org.junit.jupiter.api.Disabled;
10+
import org.junit.jupiter.api.DisplayName;
11+
import org.junit.jupiter.api.Test;
12+
13+
import java.io.*;
14+
import java.nio.charset.Charset;
15+
import java.nio.charset.StandardCharsets;
16+
import java.nio.file.Files;
17+
import java.util.stream.Collectors;
18+
19+
import static io.frictionlessdata.tableschema.TestHelper.getTestDataDirectory;
20+
21+
public class TableBOMTests {
22+
private CSVFormat csvFormat = CSVFormat
23+
.TDF
24+
.builder()
25+
.setRecordSeparator("\n")
26+
.setHeader(new String[0])
27+
.build();
28+
29+
30+
@Test
31+
@DisplayName("Create a Table from CSV File with BOM with Schema from Stream and with default CSVFormat")
32+
public void testReadFileWithBOMAndSchema() throws Exception{
33+
File testDataDir = getTestDataDirectory();
34+
// get path of test CSV file
35+
File file = new File("data/simple_data_bom2.tsv");
36+
Table table = Table.fromSource(file, testDataDir);
37+
table.setCsvFormat(csvFormat);
38+
File f = new File(getTestDataDirectory(), "schema/simple_data_schema.json");
39+
Schema schema = null;
40+
try (FileInputStream fis = new FileInputStream(f)) {
41+
schema = Schema.fromJson (fis, false);
42+
}
43+
// must not throw an exception
44+
table.setSchema(schema);
45+
}
46+
47+
@Test
48+
@DisplayName("Create a Table from CSV File with BOM without a Schema from Stream " +
49+
"and with custom CSVFormat")
50+
public void testReadFileWithBOM() throws Exception{
51+
File testDataDir = getTestDataDirectory();
52+
// get path of test CSV file
53+
File file = new File("data/data_bom.tsv");
54+
Table table = Table.fromSource(file, testDataDir);
55+
56+
table.setCsvFormat(csvFormat);
57+
Assertions.assertEquals(3, table.read().size());
58+
// must not throw an exception
59+
table.validate();
60+
}
61+
62+
@Test
63+
@DisplayName("Create a Table from CSV String data with BOM with Schema from Stream and with tab-delimited CSVFormat")
64+
@Disabled
65+
public void testReadStringWithBOMAndSchema() throws Exception{
66+
// get path of test CSV file
67+
File file = new File(getTestDataDirectory(),"data/data_bom.tsv");
68+
StringBuilder json = new StringBuilder();
69+
try (InputStream is = new FileInputStream(file)) {
70+
int r = is.read();
71+
while (r != -1) {
72+
json.append((char)r);
73+
r = is.read();
74+
}
75+
}
76+
String input = json.toString();
77+
byte[] bytes = new byte[input.length()];
78+
input.getBytes(0, input.length(), bytes, 0);
79+
Charset charset = StandardCharsets.UTF_16;
80+
String content;
81+
82+
try (ByteOrderMarkStrippingInputStream bims = new ByteOrderMarkStrippingInputStream(new ByteArrayInputStream(bytes));
83+
InputStreamReader isr = new InputStreamReader(bims.skipBOM(), charset == null ? bims.getCharset() : charset);
84+
BufferedReader rdr = new BufferedReader(isr)) {
85+
content = rdr.lines().collect(Collectors.joining("\n"));
86+
} catch (IOException ex) {
87+
throw new TableIOException(ex);
88+
}
89+
90+
Table table = Table.fromSource(json.toString(), null, csvFormat);
91+
File f = new File(getTestDataDirectory(), "schema/simple_data_schema.json");
92+
Schema schema = null;
93+
try (FileInputStream fis = new FileInputStream(f)) {
94+
schema = Schema.fromJson (fis, false);
95+
}
96+
// must not throw an exception
97+
table.setSchema(schema);
98+
}
99+
100+
}

src/test/java/io/frictionlessdata/tableschema/table_tests/TableCreationTest.java

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -321,42 +321,6 @@ public void testReadFromValidFileWithMismatchingValidSchemaViaStream() throws Ex
321321
assertThrows(TableValidationException.class, table::validate);
322322
}
323323

324-
@Test
325-
@DisplayName("Create a Table from CSV String data with BOM with Schema from Stream and with default CSVFormat")
326-
public void testReadFileWithBOMAndSchema() throws Exception{
327-
File testDataDir = getTestDataDirectory();
328-
// get path of test CSV file
329-
File file = new File("data/simple_data_bom2.tsv");
330-
Table table = Table.fromSource(file, testDataDir);
331-
table.setCsvFormat(CSVFormat.TDF.withRecordSeparator("\n").withHeader());
332-
File f = new File(getTestDataDirectory(), "schema/simple_data_schema.json");
333-
Schema schema = null;
334-
try (FileInputStream fis = new FileInputStream(f)) {
335-
schema = Schema.fromJson (fis, false);
336-
}
337-
// must not throw an exception
338-
table.setSchema(schema);
339-
}
340-
341-
@Test
342-
@DisplayName("Create a Table from CSV String data with BOM without a Schema from Stream " +
343-
"and with custom CSVFormat")
344-
public void testReadFileWithBOM() throws Exception{
345-
File testDataDir = getTestDataDirectory();
346-
// get path of test CSV file
347-
File file = new File("data/simple_data_bom2.tsv");
348-
Table table = Table.fromSource(file, testDataDir);
349-
CSVFormat csvFormat = CSVFormat
350-
.TDF
351-
.builder()
352-
.setRecordSeparator("\n")
353-
.setHeader(new String[0])
354-
.build();
355-
table.setCsvFormat(csvFormat);
356-
Assertions.assertEquals(3, table.read().size());
357-
// must not throw an exception
358-
table.validate();
359-
}
360324

361325
// Ensure that a JSON array file with different ordering of properties between the
362326
// object records can be read into a consistent String array with the help of
74 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)