Skip to content

Commit 944e819

Browse files
committed
Merge #486 from remote-tracking branch 'origin/483-addCsvEncoder'
2 parents 59ea7e4 + 67b4412 commit 944e819

File tree

3 files changed

+412
-0
lines changed

3 files changed

+412
-0
lines changed
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
/*
2+
* Copyright 2018-2023 Deutsche Nationalbibliothek et al
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.metafacture.csv;
18+
19+
import org.metafacture.framework.FluxCommand;
20+
import org.metafacture.framework.MetafactureException;
21+
import org.metafacture.framework.ObjectReceiver;
22+
import org.metafacture.framework.StreamReceiver;
23+
import org.metafacture.framework.annotations.Description;
24+
import org.metafacture.framework.annotations.In;
25+
import org.metafacture.framework.annotations.Out;
26+
import org.metafacture.framework.helpers.DefaultStreamPipe;
27+
28+
import com.opencsv.CSVWriter;
29+
30+
import java.io.IOException;
31+
import java.io.StringWriter;
32+
import java.util.ArrayList;
33+
import java.util.List;
34+
35+
/**
36+
* A csv encoder that converts a record into a csv line (Default separator: {@value #DEFAULT_SEP}).
37+
*
38+
* <p>
39+
* Each record represents a row. Each literal value represents a column value.
40+
* </P>
41+
*
42+
* @author eberhardtj ([email protected])
43+
*/
44+
@Description("Encodes each value in a record as a csv row.")
45+
@In(StreamReceiver.class)
46+
@Out(String.class)
47+
@FluxCommand("encode-csv")
48+
public class CsvEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
49+
public static final char DEFAULT_SEP = CSVWriter.DEFAULT_SEPARATOR;
50+
private CSVWriter csvWriter;
51+
private StringWriter writer;
52+
private List<String> rowItems = new ArrayList<>();
53+
private boolean isFirstRecord = true;
54+
private List<String> header = new ArrayList<>();
55+
private char separator = DEFAULT_SEP;
56+
private boolean noQuotes;
57+
private boolean includeHeader;
58+
private boolean includeRecordId;
59+
60+
/**
61+
* Creates an instance of {@link CsvEncoder} with a given separator.
62+
*
63+
* @param separator to separate columns
64+
*/
65+
public CsvEncoder(final String separator) {
66+
this.separator = separator.charAt(0);
67+
}
68+
69+
/**
70+
* Creates an instance of {@link CsvEncoder} with a given separator.
71+
*
72+
* @param separator to separate columns
73+
*/
74+
public CsvEncoder(final char separator) {
75+
this.separator = separator;
76+
}
77+
78+
/**
79+
* Creates an instance of {@link CsvEncoder}. The default separator is
80+
* {@value #DEFAULT_SEP}.
81+
*/
82+
public CsvEncoder() {
83+
}
84+
85+
/**
86+
* Start each line with the record ID.
87+
* Default is to not start each line with the record ID.
88+
*
89+
* @param includeRecordId true if the first column should consist of the record's ID
90+
*/
91+
public void setIncludeRecordId(final boolean includeRecordId) {
92+
this.includeRecordId = includeRecordId;
93+
}
94+
95+
/**
96+
* Add first record as a column description header.
97+
* Default is to not add a column description.
98+
*
99+
* @param includeHeader true if the first record should act as a CSV header, otherwise false
100+
*/
101+
public void setIncludeHeader(final boolean includeHeader) {
102+
this.includeHeader = includeHeader;
103+
}
104+
105+
/**
106+
* Set the character to separate the columns.
107+
* The default is {@value #DEFAULT_SEP}.
108+
*
109+
* @param separator set the character which separates the columns
110+
*/
111+
public void setSeparator(final String separator) {
112+
if (separator.length() > 1) {
113+
throw new MetafactureException("Separator needs to be a single character.");
114+
}
115+
this.separator = separator.charAt(0);
116+
}
117+
118+
/**
119+
* Set the character to separate the columns.
120+
* The default is {@value #DEFAULT_SEP}.
121+
*
122+
* @param separator set the character which separates the columns
123+
*/
124+
public void setSeparator(final char separator) {
125+
this.separator = separator;
126+
}
127+
128+
/**
129+
* Set if values should be not quoted by '"'.
130+
* The default is to quote values.
131+
*
132+
* @param noQuotes true if no quotes should be used. Default is false.
133+
*/
134+
public void setNoQuotes(final boolean noQuotes) {
135+
this.noQuotes = noQuotes;
136+
}
137+
138+
private void initialize() {
139+
writer = new StringWriter();
140+
final String emptyLineEnd = "";
141+
csvWriter = new CSVWriter(writer, separator,
142+
noQuotes ? CSVWriter.NO_QUOTE_CHARACTER : CSVWriter.DEFAULT_QUOTE_CHARACTER,
143+
CSVWriter.DEFAULT_ESCAPE_CHARACTER, emptyLineEnd);
144+
}
145+
146+
private String[] arrayOf(final List<String> list) {
147+
final int length = list.size();
148+
return list.toArray(new String[length]);
149+
}
150+
151+
private void resetCaches() {
152+
this.rowItems = new ArrayList<>();
153+
}
154+
155+
private void writeRow(final List<String> rowItemsArray) {
156+
final String[] row = arrayOf(rowItemsArray);
157+
csvWriter.writeNext(row);
158+
final String line = writer.toString();
159+
getReceiver().process(line);
160+
writer.getBuffer().setLength(0);
161+
}
162+
163+
@Override
164+
public void startRecord(final String identifier) {
165+
if (isFirstRecord) {
166+
initialize();
167+
if (includeRecordId) {
168+
header.add("record id");
169+
}
170+
}
171+
172+
rowItems = new ArrayList<>();
173+
174+
if (includeRecordId) {
175+
rowItems.add(identifier);
176+
}
177+
}
178+
179+
@Override
180+
public void endRecord() {
181+
if (isFirstRecord) {
182+
if (includeHeader) {
183+
writeRow(header);
184+
header.clear();
185+
}
186+
isFirstRecord = false;
187+
}
188+
189+
writeRow(rowItems);
190+
resetCaches();
191+
}
192+
193+
@Override
194+
public void literal(final String name, final String value) {
195+
if (isFirstRecord) {
196+
header.add(name);
197+
}
198+
rowItems.add(value);
199+
}
200+
201+
@Override
202+
public void onCloseStream() {
203+
try {
204+
csvWriter.close();
205+
}
206+
catch (final IOException e) {
207+
throw new MetafactureException(e);
208+
}
209+
}
210+
211+
@Override
212+
public void onResetStream() {
213+
this.includeRecordId = false;
214+
this.includeHeader = false;
215+
this.header = new ArrayList<>();
216+
this.isFirstRecord = true;
217+
this.rowItems = new ArrayList<>();
218+
}
219+
220+
}

metafacture-csv/src/main/resources/flux-commands.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@
1414
# limitations under the License.
1515
#
1616
decode-csv org.metafacture.csv.CsvDecoder
17+
encode-csv org.metafacture.csv.CsvEncoder

0 commit comments

Comments
 (0)