Skip to content

Commit c8815f5

Browse files
authored
Csv decoder (#1290)
* WIP: adding CSV decoder * Adding CSV decoder * Added CSV roundtrip * Fixing from review
1 parent 3c222d8 commit c8815f5

20 files changed

+837
-87
lines changed

acceptance_tests/inputs-format.sh

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
setUp() {
44
rm test*.yml 2>/dev/null || true
55
rm test*.properties 2>/dev/null || true
6+
rm test*.csv 2>/dev/null || true
7+
rm test*.tsv 2>/dev/null || true
68
rm test*.xml 2>/dev/null || true
79
}
810

@@ -40,6 +42,51 @@ EOM
4042
assertEquals "$expected" "$X"
4143
}
4244

45+
testInputCSV() {
46+
cat >test.csv <<EOL
47+
fruit,yumLevel
48+
apple,5
49+
banana,4
50+
EOL
51+
52+
read -r -d '' expected << EOM
53+
- fruit: apple
54+
yumLevel: 5
55+
- fruit: banana
56+
yumLevel: 4
57+
EOM
58+
59+
X=$(./yq e -p=csv test.csv)
60+
assertEquals "$expected" "$X"
61+
62+
X=$(./yq ea -p=csv test.csv)
63+
assertEquals "$expected" "$X"
64+
}
65+
66+
testInputTSV() {
67+
cat >test.tsv <<EOL
68+
fruit yumLevel
69+
apple 5
70+
banana 4
71+
EOL
72+
73+
read -r -d '' expected << EOM
74+
- fruit: apple
75+
yumLevel: 5
76+
- fruit: banana
77+
yumLevel: 4
78+
EOM
79+
80+
X=$(./yq e -p=t test.tsv)
81+
assertEquals "$expected" "$X"
82+
83+
X=$(./yq ea -p=t test.tsv)
84+
assertEquals "$expected" "$X"
85+
}
86+
87+
88+
89+
4390
testInputXml() {
4491
cat >test.yml <<EOL
4592
<cat legs="4">BiBi</cat>

acceptance_tests/output-format.sh

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,48 @@ EOM
102102
assertEquals "$expected" "$X"
103103
}
104104

105+
testOutputCSV() {
106+
cat >test.yml <<EOL
107+
- fruit: apple
108+
yumLevel: 5
109+
- fruit: banana
110+
yumLevel: 4
111+
EOL
112+
113+
read -r -d '' expected << EOM
114+
fruit,yumLevel
115+
apple,5
116+
banana,4
117+
EOM
118+
119+
X=$(./yq -o=c test.yml)
120+
assertEquals "$expected" "$X"
121+
122+
X=$(./yq ea -o=csv test.yml)
123+
assertEquals "$expected" "$X"
124+
}
125+
126+
testOutputTSV() {
127+
cat >test.yml <<EOL
128+
- fruit: apple
129+
yumLevel: 5
130+
- fruit: banana
131+
yumLevel: 4
132+
EOL
133+
134+
read -r -d '' expected << EOM
135+
fruit yumLevel
136+
apple 5
137+
banana 4
138+
EOM
139+
140+
X=$(./yq -o=t test.yml)
141+
assertEquals "$expected" "$X"
142+
143+
X=$(./yq ea -o=tsv test.yml)
144+
assertEquals "$expected" "$X"
145+
}
146+
105147
testOutputXml() {
106148
cat >test.yml <<EOL
107149
a: {b: {c: ["cat"]}}

cmd/utils.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ func configureDecoder() (yqlib.Decoder, error) {
6868
return yqlib.NewPropertiesDecoder(), nil
6969
case yqlib.JsonInputFormat:
7070
return yqlib.NewJSONDecoder(), nil
71+
case yqlib.CSVObjectInputFormat:
72+
return yqlib.NewCSVObjectDecoder(','), nil
73+
case yqlib.TSVObjectInputFormat:
74+
return yqlib.NewCSVObjectDecoder('\t'), nil
7175
}
7276

7377
return yqlib.NewYamlDecoder(), nil

examples/sample_objects.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
name,numberOfCats,likesApples,height
2+
Gary,1,true,168.8
3+
Samantha's Rabbit,2,false,-188.8

pkg/yqlib/csv_test.go

Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
package yqlib
2+
3+
import (
4+
"bufio"
5+
"fmt"
6+
"testing"
7+
8+
"github.com/mikefarah/yq/v4/test"
9+
)
10+
11+
const csvSimple = `name,numberOfCats,likesApples,height
12+
Gary,1,true,168.8
13+
Samantha's Rabbit,2,false,-188.8
14+
`
15+
16+
const expectedUpdatedSimpleCsv = `name,numberOfCats,likesApples,height
17+
Gary,3,true,168.8
18+
Samantha's Rabbit,2,false,-188.8
19+
`
20+
21+
const csvSimpleShort = `Name,Number of Cats
22+
Gary,1
23+
Samantha's Rabbit,2
24+
`
25+
26+
const tsvSimple = `name numberOfCats likesApples height
27+
Gary 1 true 168.8
28+
Samantha's Rabbit 2 false -188.8
29+
`
30+
31+
const expectedYamlFromCSV = `- name: Gary
32+
numberOfCats: 1
33+
likesApples: true
34+
height: 168.8
35+
- name: Samantha's Rabbit
36+
numberOfCats: 2
37+
likesApples: false
38+
height: -188.8
39+
`
40+
41+
const expectedYamlFromCSVMissingData = `- name: Gary
42+
numberOfCats: 1
43+
height: 168.8
44+
- name: Samantha's Rabbit
45+
height: -188.8
46+
likesApples: false
47+
`
48+
49+
const csvSimpleMissingData = `name,numberOfCats,height
50+
Gary,1,168.8
51+
Samantha's Rabbit,,-188.8
52+
`
53+
54+
const csvTestSimpleYaml = `- [i, like, csv]
55+
- [because, excel, is, cool]`
56+
57+
const expectedSimpleCsv = `i,like,csv
58+
because,excel,is,cool
59+
`
60+
61+
const tsvTestExpectedSimpleCsv = `i like csv
62+
because excel is cool
63+
`
64+
65+
var csvScenarios = []formatScenario{
66+
{
67+
description: "Encode CSV simple",
68+
input: csvTestSimpleYaml,
69+
expected: expectedSimpleCsv,
70+
scenarioType: "encode-csv",
71+
},
72+
{
73+
description: "Encode TSV simple",
74+
input: csvTestSimpleYaml,
75+
expected: tsvTestExpectedSimpleCsv,
76+
scenarioType: "encode-tsv",
77+
},
78+
{
79+
description: "Encode Empty",
80+
skipDoc: true,
81+
input: `[]`,
82+
expected: "",
83+
scenarioType: "encode-csv",
84+
},
85+
{
86+
description: "Comma in value",
87+
skipDoc: true,
88+
input: `["comma, in, value", things]`,
89+
expected: "\"comma, in, value\",things\n",
90+
scenarioType: "encode-csv",
91+
},
92+
{
93+
description: "Encode array of objects to csv",
94+
input: expectedYamlFromCSV,
95+
expected: csvSimple,
96+
scenarioType: "encode-csv",
97+
},
98+
{
99+
description: "Encode array of objects to custom csv format",
100+
subdescription: "Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Pick the columns and call the header whatever you like.",
101+
input: expectedYamlFromCSV,
102+
expected: csvSimpleShort,
103+
expression: `[["Name", "Number of Cats"]] + [.[] | [.name, .numberOfCats ]]`,
104+
scenarioType: "encode-csv",
105+
},
106+
{
107+
description: "Encode array of objects to csv - missing fields behaviour",
108+
subdescription: "First entry is used to determine the headers, and it is missing 'likesApples', so it is not included in the csv. Second entry does not have 'numberOfCats' so that is blank",
109+
input: expectedYamlFromCSVMissingData,
110+
expected: csvSimpleMissingData,
111+
scenarioType: "encode-csv",
112+
},
113+
{
114+
description: "Parse CSV into an array of objects",
115+
subdescription: "First row is assumed to be the header row.",
116+
input: csvSimple,
117+
expected: expectedYamlFromCSV,
118+
scenarioType: "decode-csv-object",
119+
},
120+
{
121+
description: "Parse TSV into an array of objects",
122+
subdescription: "First row is assumed to be the header row.",
123+
input: tsvSimple,
124+
expected: expectedYamlFromCSV,
125+
scenarioType: "decode-tsv-object",
126+
},
127+
{
128+
description: "Round trip",
129+
input: csvSimple,
130+
expected: expectedUpdatedSimpleCsv,
131+
expression: `(.[] | select(.name == "Gary") | .numberOfCats) = 3`,
132+
scenarioType: "roundtrip-csv",
133+
},
134+
}
135+
136+
func testCSVScenario(t *testing.T, s formatScenario) {
137+
switch s.scenarioType {
138+
case "encode-csv":
139+
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder(',')), s.description)
140+
case "encode-tsv":
141+
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder('\t')), s.description)
142+
case "decode-csv-object":
143+
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder(','), NewYamlEncoder(2, false, true, true)), s.description)
144+
case "decode-tsv-object":
145+
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder('\t'), NewYamlEncoder(2, false, true, true)), s.description)
146+
case "roundtrip-csv":
147+
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder(','), NewCsvEncoder(',')), s.description)
148+
default:
149+
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
150+
}
151+
}
152+
153+
func documentCSVDecodeObjectScenario(t *testing.T, w *bufio.Writer, s formatScenario, formatType string) {
154+
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
155+
156+
if s.subdescription != "" {
157+
writeOrPanic(w, s.subdescription)
158+
writeOrPanic(w, "\n\n")
159+
}
160+
161+
writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType))
162+
writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input))
163+
164+
writeOrPanic(w, "then\n")
165+
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v sample.%v\n```\n", formatType, formatType))
166+
writeOrPanic(w, "will output\n")
167+
168+
separator := ','
169+
if formatType == "tsv" {
170+
separator = '\t'
171+
}
172+
173+
writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n",
174+
processFormatScenario(s, NewCSVObjectDecoder(separator), NewYamlEncoder(s.indent, false, true, true))),
175+
)
176+
}
177+
178+
func documentCSVEncodeScenario(w *bufio.Writer, s formatScenario, formatType string) {
179+
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
180+
181+
if s.subdescription != "" {
182+
writeOrPanic(w, s.subdescription)
183+
writeOrPanic(w, "\n\n")
184+
}
185+
186+
writeOrPanic(w, "Given a sample.yml file of:\n")
187+
writeOrPanic(w, fmt.Sprintf("```yaml\n%v\n```\n", s.input))
188+
189+
writeOrPanic(w, "then\n")
190+
191+
expression := s.expression
192+
193+
if expression != "" {
194+
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=%v '%v' sample.yml\n```\n", formatType, expression))
195+
} else {
196+
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=%v sample.yml\n```\n", formatType))
197+
}
198+
writeOrPanic(w, "will output\n")
199+
200+
separator := ','
201+
if formatType == "tsv" {
202+
separator = '\t'
203+
}
204+
205+
writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType,
206+
processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder(separator))),
207+
)
208+
}
209+
210+
func documentCSVRoundTripScenario(w *bufio.Writer, s formatScenario, formatType string) {
211+
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))
212+
213+
if s.subdescription != "" {
214+
writeOrPanic(w, s.subdescription)
215+
writeOrPanic(w, "\n\n")
216+
}
217+
218+
writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType))
219+
writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input))
220+
221+
writeOrPanic(w, "then\n")
222+
223+
expression := s.expression
224+
225+
if expression != "" {
226+
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v -o=%v '%v' sample.%v\n```\n", formatType, formatType, expression, formatType))
227+
} else {
228+
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v -o=%v sample.%v\n```\n", formatType, formatType, formatType))
229+
}
230+
writeOrPanic(w, "will output\n")
231+
232+
separator := ','
233+
if formatType == "tsv" {
234+
separator = '\t'
235+
}
236+
237+
writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType,
238+
processFormatScenario(s, NewCSVObjectDecoder(separator), NewCsvEncoder(separator))),
239+
)
240+
}
241+
242+
func documentCSVScenario(t *testing.T, w *bufio.Writer, i interface{}) {
243+
s := i.(formatScenario)
244+
if s.skipDoc {
245+
return
246+
}
247+
switch s.scenarioType {
248+
case "encode-csv":
249+
documentCSVEncodeScenario(w, s, "csv")
250+
case "encode-tsv":
251+
documentCSVEncodeScenario(w, s, "tsv")
252+
case "decode-csv-object":
253+
documentCSVDecodeObjectScenario(t, w, s, "csv")
254+
case "decode-tsv-object":
255+
documentCSVDecodeObjectScenario(t, w, s, "tsv")
256+
case "roundtrip-csv":
257+
documentCSVRoundTripScenario(w, s, "csv")
258+
259+
default:
260+
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
261+
}
262+
}
263+
264+
func TestCSVScenarios(t *testing.T) {
265+
for _, tt := range csvScenarios {
266+
testCSVScenario(t, tt)
267+
}
268+
genericScenarios := make([]interface{}, len(csvScenarios))
269+
for i, s := range csvScenarios {
270+
genericScenarios[i] = s
271+
}
272+
documentScenarios(t, "usage", "csv-tsv", genericScenarios, documentCSVScenario)
273+
}

0 commit comments

Comments
 (0)