Skip to content

Commit 38c8872

Browse files
author
R. S. Doiel
committed
prep for v1.3.3, Added option, '-for-dataset' to csv2jsonl. This renders JSON lines output suitable for loading into a dataset collectiojn.
1 parent 24eb061 commit 38c8872

File tree

103 files changed

+166
-139
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

103 files changed

+166
-139
lines changed

CITATION.cff

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ maintainers:
1515
orcid: ""
1616

1717
repository-code: "https://github.com/caltechlibrary/datatools"
18-
version: 1.3.2
18+
version: 1.3.3
1919
license-url: "https://data.caltech.edu/license"
2020
keywords: [ "csv", "excel", "sql", "json", "yaml", "xlsx", "golang", "bash" ]
21-
date-released: 2025-05-13
21+
date-released: 2025-05-14

about.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
<section>
2727
<h1 id="about-this-software">About this software</h1>
28-
<h2 id="datatools-1.3.2">datatools 1.3.2</h2>
28+
<h2 id="datatools-1.3.3">datatools 1.3.3</h2>
2929
<h3 id="authors">Authors</h3>
3030
<ul>
3131
<li>R. S. Doiel</li>

about.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,16 @@ authors:
1111
orcid: ""
1212

1313
repository-code: "https://github.com/caltechlibrary/datatools"
14-
version: 1.3.2
14+
version: 1.3.3
1515
license-url: "https://data.caltech.edu/license"
1616
keywords: [ "csv", "excel", "sql", "json", "yaml", "xlsx", "golang", "bash" ]
17-
date-released: 2025-05-13
17+
date-released: 2025-05-14
1818
---
1919

2020
About this software
2121
===================
2222

23-
## datatools 1.3.2
23+
## datatools 1.3.3
2424

2525
### Authors
2626

cmd/csv2jsonl/csv2jsonl.go

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ is one object per line. See https://jsonlines.org.
9595
-use-lazy-quotes
9696
: use lazy quotes for for CSV input
9797
98+
-for-dataset COLUMN_NO
99+
: if COLUMN_NO is greater than -1 then, generate a dataset load compatible version of the CSV file
100+
using COLUMN_NO as key.
98101
99102
# EXAMPLES
100103
@@ -130,6 +133,7 @@ Convert data1.csv to JSON line (one object line per blob)
130133
trimLeadingSpace bool
131134
fieldsPerRecord int
132135
reuseRecord bool
136+
forDataset int
133137
)
134138

135139
func main() {
@@ -159,6 +163,7 @@ func main() {
159163
flag.BoolVar(&trimLeadingSpace, "trim-leading-space", false, "trim leading space in fields for CSV input")
160164
flag.BoolVar(&reuseRecord, "reuse-record", false, "reuse the backing array")
161165
flag.IntVar(&fieldsPerRecord, "fields-per-record", 0, "Set the number of fields expected in the CSV read, -1 to turn off")
166+
flag.IntVar(&forDataset, "for-dataset", -1, "generate a dataset compatible JSON lines output using column number as key")
162167

163168
// Parse environment and options
164169
flag.Parse()
@@ -244,12 +249,16 @@ func main() {
244249

245250
// Pad the fieldnames if necessary
246251
object = map[string]interface{}{}
252+
key := ""
247253
for col, val := range row {
248254
if col < len(fieldNames) {
249255
object[fieldNames[col]] = val
250256
} else {
251257
object[fmt.Sprintf("col_%d", col)] = val
252258
}
259+
if (col == forDataset) {
260+
key = fmt.Sprintf("%s", val);
261+
}
253262
}
254263
var src []byte
255264
src, err = datatools.JSONMarshal(object)
@@ -259,7 +268,17 @@ func main() {
259268
}
260269
hasError = true
261270
}
262-
fmt.Fprintf(out, "%s%s", src, eol)
271+
if (forDataset >= 0) {
272+
if (key == "") {
273+
if !quiet {
274+
fmt.Fprintf(eout, "error row, mising key value for column %d, row %d\n", forDataset, rowNo)
275+
}
276+
}
277+
fmt.Fprintf(out, `{%q:%q,%q:%s}%s`, "key", key, "object", src, eol)
278+
} else {
279+
fmt.Fprintf(out, "%s%s", src, eol)
280+
}
281+
rowNo++
263282
}
264283
if hasError == true {
265284
os.Exit(1)

codemeta.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@
2929
}
3030
],
3131
"dateCreated": "2017-02-06",
32-
"dateModified": "2025-05-13",
33-
"datePublished": "2025-05-13",
32+
"dateModified": "2025-05-14",
33+
"datePublished": "2025-05-14",
3434
"description": "A set of command line tools for working with CSV, Excel Workbooks, JSON and structured text documents.",
3535
"funder": {
3636
"@id": "https://ror.org/5dxps055",
@@ -61,9 +61,9 @@
6161
"Golang >= 1.23.5",
6262
"Pandoc >= 3.1"
6363
],
64-
"version": "1.3.2",
64+
"version": "1.3.3",
6565
"developmentStatus": "active",
6666
"issueTracker": "https://github.com/caltechlibrary/datatools/issues",
6767
"downloadUrl": "https://github.com/caltechlibrary/datatools/releases/",
68-
"releaseNotes": "Added cli, csv2jsonl, that will convert a CSV document into a JSON lines list of objects."
68+
"releaseNotes": "Added option, `-for-dataset` to csv2jsonl. This renders JSON lines output suitable for loading into a dataset collectiojn."
6969
}

codemeta2cff.1.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
%codemeta2cff(1) user manual | version 1.3.2 1ee0728
1+
%codemeta2cff(1) user manual | version 1.3.3 24eb061
22
% R. S. Doiel
3-
% 2025-05-13
3+
% 2025-05-14
44

55
# NAME
66

csv2json.1.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
%csv2json(1) user manual | version 1.3.2 1ee0728
1+
%csv2json(1) user manual | version 1.3.3 24eb061
22
% R. S. Doiel
3-
% 2025-05-13
3+
% 2025-05-14
44

55
# NAME
66

csv2jsonl.1.html

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ <h1 id="options">OPTIONS</h1>
8989
<dd>
9090
use lazy quotes for for CSV input
9191
</dd>
92+
<dt>-for-dataset COLUMN_NO</dt>
93+
<dd>
94+
if COLUMN_NO is greater than -1 then, generate a dataset load compatible
95+
version of the CSV file using COLUMN_NO as key.
96+
</dd>
9297
</dl>
9398
<h1 id="examples">EXAMPLES</h1>
9499
<p>Convert data1.csv to data1.jsonl using Unix pipes.</p>

csv2jsonl.1.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
%csv2jsonl(1) user manual | version 1.3.2 1ee0728
1+
%csv2jsonl(1) user manual | version 1.3.3 24eb061
22
% R. S. Doiel
3-
% 2025-05-13
3+
% 2025-05-14
44

55
# NAME
66

@@ -59,6 +59,9 @@ is one object per line. See https://jsonlines.org.
5959
-use-lazy-quotes
6060
: use lazy quotes for for CSV input
6161

62+
-for-dataset COLUMN_NO
63+
: if COLUMN_NO is greater than -1 then, generate a dataset load compatible version of the CSV file
64+
using COLUMN_NO as key.
6265

6366
# EXAMPLES
6467

csv2mdtable.1.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ <h1 id="examples">EXAMPLES</h1>
7979
<pre><code> cat data1.csv | csv2mdtable &gt; data1.md</code></pre>
8080
<p>Convert data1.csv to data1.md using options.</p>
8181
<pre><code> csv2mdtable -i data1.csv -o data1.md</code></pre>
82-
<p>csv2mdtable 1.3.2</p>
82+
<p>csv2mdtable 1.3.3</p>
8383
</section>
8484

8585
<footer>

0 commit comments

Comments
 (0)