Skip to content

Commit a5549dd

Browse files
committed
Add CsvEncoder (#483)
Copied from https://github.com/metafacture/metafacture-csv-plugin. Original author: eberhardtj ([email protected]).
1 parent 59ea7e4 commit a5549dd

File tree

3 files changed

+407
-0
lines changed

3 files changed

+407
-0
lines changed
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
/*
2+
* Copyright 2018-2023 Deutsche Nationalbibliothek et al
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.metafacture.csv;
18+
19+
import org.metafacture.framework.FluxCommand;
20+
import org.metafacture.framework.MetafactureException;
21+
import org.metafacture.framework.ObjectReceiver;
22+
import org.metafacture.framework.StreamReceiver;
23+
import org.metafacture.framework.annotations.Description;
24+
import org.metafacture.framework.annotations.In;
25+
import org.metafacture.framework.annotations.Out;
26+
import org.metafacture.framework.helpers.DefaultStreamPipe;
27+
28+
import com.opencsv.CSVWriter;
29+
30+
import java.io.IOException;
31+
import java.io.StringWriter;
32+
import java.util.ArrayList;
33+
import java.util.List;
34+
import java.util.stream.Collectors;
35+
36+
/**
37+
* A csv encoder that converts a record into a csv line (Default separator: {@value #DEFAULT_SEP}).
38+
*
39+
* <p>
40+
* Each record represents a row. Each literal value represents a column value.
41+
* </P>
42+
*
43+
* @author eberhardtj ([email protected])
44+
*/
45+
@Description("Encodes each value in a record as a csv row.")
46+
@In(StreamReceiver.class)
47+
@Out(String.class)
48+
@FluxCommand("encode-csv")
49+
public class CsvEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
50+
public static final char DEFAULT_SEP = CSVWriter.DEFAULT_SEPARATOR;
51+
private CSVWriter csvWriter;
52+
private StringWriter writer;
53+
private List<String> rowItems = new ArrayList<>();
54+
private boolean isFirstRecord = true;
55+
private List<String> header = new ArrayList<>();
56+
private char separator = DEFAULT_SEP;
57+
private boolean noQuotes;
58+
private boolean includeHeader;
59+
private boolean includeRecordId;
60+
61+
/**
62+
* Creates an instance of {@link CsvEncoder} with a given separator.
63+
*
64+
* @param separator to separate columns
65+
*/
66+
public CsvEncoder(final String separator) {
67+
this.separator = separator.charAt(0);
68+
}
69+
70+
/**
71+
* Creates an instance of {@link CsvEncoder} with a given separator.
72+
*
73+
* @param separator to separate columns
74+
*/
75+
public CsvEncoder(final char separator) {
76+
this.separator = separator;
77+
}
78+
79+
/**
80+
* Creates an instance of {@link CsvEncoder}. The default separator is
81+
* {@value #DEFAULT_SEP}.
82+
*/
83+
public CsvEncoder() {
84+
}
85+
86+
/**
87+
* Start each line with the record ID.
88+
* Default is to not start each line with the record ID.
89+
*
90+
* @param includeRecordId true if the first column should consist of the record's ID
91+
*/
92+
public void setIncludeRecordId(final boolean includeRecordId) {
93+
this.includeRecordId = includeRecordId;
94+
}
95+
96+
/**
97+
* Add first record as a column description header.
98+
* Default is to not add a column description.
99+
*
100+
* @param includeHeader true if the first record should act as a CSV header, otherwise false
101+
*/
102+
public void setIncludeHeader(final boolean includeHeader) {
103+
this.includeHeader = includeHeader;
104+
}
105+
106+
/**
107+
* Add a character to separate the columns.
108+
* The default is {@value #DEFAULT_SEP}.
109+
*
110+
* @param separator set the character which separates the columns
111+
*/
112+
public void setSeparator(final String separator) {
113+
if (separator.length() > 1) {
114+
throw new MetafactureException("Separator needs to be a single character.");
115+
}
116+
this.separator = separator.charAt(0);
117+
}
118+
119+
/**
120+
* Add a character to separate the columns.
121+
* The default is {@value #DEFAULT_SEP}.
122+
*
123+
* @param separator set the character which separates the columns
124+
*/
125+
public void setSeparator(final char separator) {
126+
this.separator = separator;
127+
}
128+
129+
/**
130+
* Set if values should be not quoted by '"'.
131+
* The default is to quote values.
132+
*
133+
* @param noQuotes true if no quotes should be used. Default is false.
134+
*/
135+
public void setNoQuotes(final boolean noQuotes) {
136+
this.noQuotes = noQuotes;
137+
}
138+
139+
private void initialize() {
140+
writer = new StringWriter();
141+
final String emptyLineEnd = "";
142+
csvWriter = new CSVWriter(writer,
143+
separator,
144+
noQuotes ? CSVWriter.NO_QUOTE_CHARACTER : CSVWriter.DEFAULT_QUOTE_CHARACTER,
145+
CSVWriter.DEFAULT_ESCAPE_CHARACTER,
146+
emptyLineEnd);
147+
}
148+
149+
private String[] arrayOf(final List<String> list) {
150+
final int length = list.size();
151+
return list.toArray(new String[length]);
152+
}
153+
154+
private void resetCaches() {
155+
this.rowItems = new ArrayList<>();
156+
}
157+
158+
private void writeRow(final List<String> rowItemsArray) {
159+
final String[] row = arrayOf(rowItemsArray);
160+
csvWriter.writeNext(row);
161+
final String line = writer.toString();
162+
getReceiver().process(line);
163+
writer.getBuffer().setLength(0);
164+
}
165+
166+
@Override
167+
public void startRecord(final String identifier) {
168+
if (isFirstRecord) {
169+
initialize();
170+
if (includeRecordId) {
171+
header.add("record id");
172+
}
173+
}
174+
175+
rowItems = new ArrayList<>();
176+
177+
if (includeRecordId) {
178+
rowItems.add(identifier);
179+
}
180+
}
181+
182+
@Override
183+
public void endRecord() {
184+
if (isFirstRecord) {
185+
if (includeHeader) {
186+
final List<String> uniqueHeader = header.stream().distinct().collect(Collectors.toList());
187+
writeRow(uniqueHeader);
188+
header.clear();
189+
}
190+
isFirstRecord = false;
191+
}
192+
193+
writeRow(rowItems);
194+
195+
resetCaches();
196+
}
197+
198+
@Override
199+
public void literal(final String name, final String value) {
200+
if (isFirstRecord) {
201+
header.add(name);
202+
}
203+
rowItems.add(value);
204+
}
205+
206+
@Override
207+
public void onCloseStream() {
208+
try {
209+
csvWriter.close();
210+
}
211+
catch (final IOException e) {
212+
throw new MetafactureException(e);
213+
}
214+
}
215+
216+
@Override
217+
public void onResetStream() {
218+
this.includeRecordId = false;
219+
this.includeHeader = false;
220+
this.header = new ArrayList<>();
221+
222+
this.isFirstRecord = true;
223+
this.rowItems = new ArrayList<>();
224+
}
225+
226+
}

metafacture-csv/src/main/resources/flux-commands.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@
1414
# limitations under the License.
1515
#
1616
decode-csv org.metafacture.csv.CsvDecoder
17+
encode-csv org.metafacture.csv.CsvEncoder

0 commit comments

Comments
 (0)