Skip to content

Commit bbb9a02

Browse files
csv files are almost completely parsing correctly
1 parent d50df94 commit bbb9a02

File tree

3 files changed

+44
-23
lines changed

3 files changed

+44
-23
lines changed

src/main/java/com/datastax/cdm/CassandraDatasetManager.java

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import com.google.common.util.concurrent.ListenableFuture;
88
import org.apache.commons.cli.*;
99
import org.apache.commons.csv.CSVFormat;
10+
import org.apache.commons.csv.CSVParser;
1011
import org.apache.commons.csv.CSVRecord;
1112
import org.apache.commons.io.FileUtils;
1213
import org.eclipse.jgit.api.Git;
@@ -22,14 +23,19 @@
2223

2324
import java.io.*;
2425
import java.net.URL;
26+
import java.nio.charset.Charset;
2527
import java.util.*;
2628

2729
/**
2830
* Created by jhaddad on 6/29/16.
2931
*/
3032

33+
3134
public class CassandraDatasetManager {
3235

36+
public class InvalidArgsException extends Exception {
37+
38+
}
3339
private static final String YAML_URI = "https://raw.githubusercontent.com/riptano/cdm-java/master/datasets.yaml";
3440
private Map<String, Dataset> datasets;
3541
private Session session;
@@ -49,11 +55,9 @@ public static void main(String[] args) throws IOException, ParseException, Inter
4955

5056
// check for the .cdm directory
5157
String home_dir = System.getProperty("user.home");
52-
// System.out.println(home_dir);
5358
String cdm_path = home_dir + "/.cdm";
5459

5560
File f = new File(cdm_path);
56-
// System.out.println(f);
5761

5862
f.mkdir();
5963

@@ -64,8 +68,6 @@ public static void main(String[] args) throws IOException, ParseException, Inter
6468
FileUtils.copyURLToFile(y, yaml);
6569
}
6670
// read in the YAML dataset list
67-
// System.out.println("Loading Configuration YAML");
68-
6971
ObjectMapper mapper = new ObjectMapper(new YAMLFactory());
7072

7173
// why extra work? Java Type Erasure will prevent type detection otherwise
@@ -74,7 +76,6 @@ public static void main(String[] args) throws IOException, ParseException, Inter
7476
// debug: show all datasets no matter what
7577
CassandraDatasetManager cdm = new CassandraDatasetManager(data);
7678

77-
7879
// parse the CLI options
7980
Options options = new Options();
8081

@@ -281,19 +282,23 @@ void install(String name) throws IOException, InterruptedException, GitAPIExcept
281282
List<ResultSetFuture> futures = new ArrayList<>();
282283
for(CSVRecord record: records) {
283284
// generate a CQL statement
284-
String cql = generateCQL(table,
285-
record,
286-
fieldlist
287-
);
288-
289-
ResultSetFuture future = session.executeAsync(cql);
290-
futures.add(future);
291-
totalComplete++;
292-
if(totalComplete % 100 == 0) {
293-
futures.forEach(ResultSetFuture::getUninterruptibly);
294-
futures.clear();
285+
String cql = null;
286+
try {
287+
cql = generateCQL(table, record, fieldlist);
288+
289+
ResultSetFuture future = session.executeAsync(cql);
290+
futures.add(future);
291+
totalComplete++;
292+
if(totalComplete % 100 == 0) {
293+
futures.forEach(ResultSetFuture::getUninterruptibly);
294+
futures.clear();
295+
}
296+
System.out.print("Complete: " + totalComplete + "\r");
297+
298+
} catch (InvalidArgsException e) {
299+
e.printStackTrace();
300+
System.out.println(record);
295301
}
296-
System.out.print("Complete: " + totalComplete + "\r");
297302

298303
}
299304
futures.forEach(ResultSetFuture::getUninterruptibly);
@@ -305,14 +310,15 @@ void install(String name) throws IOException, InterruptedException, GitAPIExcept
305310
System.out.println("Loading data");
306311
}
307312

308-
Iterable<CSVRecord> openCSV(String path) throws IOException {
309-
Reader in = new FileReader(path);
310-
return CSVFormat.RFC4180.parse(in);
313+
CSVParser openCSV(String path) throws IOException {
314+
File f = new File(path);
315+
return CSVParser.parse(f, Charset.forName("UTF-8"),
316+
CSVFormat.RFC4180.withQuote(null));
311317
}
312318

313319
String generateCQL(String table,
314320
CSVRecord record,
315-
ArrayList<Field> fields) {
321+
ArrayList<Field> fields) throws InvalidArgsException {
316322

317323
HashSet needs_quotes = new HashSet();
318324

@@ -330,6 +336,8 @@ String generateCQL(String table,
330336
query.append(sjfields.toString());
331337

332338
query.append(") VALUES (");
339+
if (record.size() != fields.size())
340+
throw new InvalidArgsException();
333341

334342
for(int i = 0; i < record.size(); i++) {
335343
String v = record.get(i);

src/main/sh/build_runnable.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
#!/bin/sh
22

33
rm -rf target/*jar-with-dependencies.jar
4+
rm -f bin/cdm
5+
46
mvn -e package
57

6-
mkdir bin
8+
mkdir -p bin
79

810
cat src/main/sh/execute.sh target/*jar-with-dependencies.jar > bin/cdm && chmod 755 bin/cdm

test/java/com/datastax/cdm/CassandraDatasetManagerTest.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import static org.hamcrest.CoreMatchers.containsString;
55
import static org.junit.Assert.assertThat;
66

7+
import org.apache.commons.csv.CSVParser;
78
import org.apache.commons.csv.CSVRecord;
89
import org.junit.Test;
910

@@ -17,7 +18,7 @@
1718
public class CassandraDatasetManagerTest {
1819

1920
@Test
20-
public void testCQLStatementGeneration() throws IOException {
21+
public void testCQLStatementGeneration() throws IOException, CassandraDatasetManager.InvalidArgsException {
2122
CassandraDatasetManager c = new CassandraDatasetManager();
2223
Iterable<CSVRecord> records = c.openCSV("data/alltypes.csv");
2324
CSVRecord r = records.iterator().next();
@@ -39,4 +40,14 @@ public void testCQLStatementGeneration() throws IOException {
3940

4041
}
4142

43+
@Test
44+
public void testCSVWeirdQuotes() throws IOException {
45+
CassandraDatasetManager c = new CassandraDatasetManager();
46+
CSVParser records = c.openCSV("data/users2.csv");
47+
for (CSVRecord record : records) {
48+
49+
}
50+
51+
}
52+
4253
}

0 commit comments

Comments
 (0)