Skip to content

Commit ca3223e

Browse files
committed
Quick Save
1 parent a817367 commit ca3223e

File tree

2 files changed

+61
-16
lines changed

2 files changed

+61
-16
lines changed

TODO.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
+ [ ] csvrotate would take a CSV file as import and output columns as rows
2828
+ [ ] json2csv would convert a 2d JSON array to CSV output, it would comvert a JSON object/map to a column of keys next to a column of values
2929
+ E.g. `cat data.json | json2csv`
30+
+ [ ] smartcat would function like cat but with support for ranges of lines (e.g. show me last 20 lines: smartcat -start=0 -end="-20" file.txt; cat starting with 10th line: smartcat -start=10 file.txt)
31+
+ [ ] allow prefix line number with a specific delimiter (E.g. comma would let you cat a CSV file adding row numbers as first column)
32+
+ [ ] show lines with prefix, suffix, containing or regxp
33+
+ [ ] show lines without prefix, suffix, containing or regexp
3034

3135
## Completed
3236

cmds/csvjoin/csvjoin.go

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ merged-data.csv..
7878
maxEditDistance int
7979
stopWordsOption string
8080
allowDuplicates bool
81+
asInMemory bool
8182
)
8283

8384
// cellsMatch checks if two cells' values match
@@ -109,9 +110,9 @@ func cellsMatch(val1, val2 string, stopWords []string) bool {
109110
return false
110111
}
111112

112-
func scanTable(w *csv.Writer, rowA []string, col1 int, table [][]string, col2 int, stopWords []string) {
113+
func scanTable(w *csv.Writer, rowA []string, col1 int, table [][]string, col2 int, stopWords []string) error {
113114
if col1 >= len(rowA) {
114-
return
115+
return nil
115116
}
116117
val1 := rowA[col1]
117118
if trimSpaces == true {
@@ -131,15 +132,22 @@ func scanTable(w *csv.Writer, rowA []string, col1 int, table [][]string, col2 in
131132
// We have a match, join the two rows and output
132133
combinedRows := append(rowA, rowB...)
133134
if err := w.Write(combinedRows); err != nil {
134-
fmt.Fprintf(os.Stderr, "Can't write csv row line %d of table 2, %s\n", i, err)
135-
return
135+
return fmt.Errorf("Can't write csv row line %d of table 2, %s\n", i, err)
136+
}
137+
w.Flush()
138+
if verbose == true {
139+
fmt.Print("*")
140+
}
141+
if err := w.Error(); err != nil {
142+
return err
136143
}
137144
if allowDuplicates == false {
138-
return
145+
return nil
139146
}
140147
}
141148
}
142149
}
150+
return nil
143151
}
144152

145153
func init() {
@@ -169,6 +177,7 @@ func init() {
169177
flag.StringVar(&stopWordsOption, "stop-words", "", "a column delimited list of stop words to ingnore when matching")
170178
flag.BoolVar(&allowDuplicates, "allow-duplicates", true, "allow duplicates when searching for matches")
171179
flag.BoolVar(&trimSpaces, "trim-spaces", false, "trim spaces around cell values before comparing")
180+
flag.BoolVar(&asInMemory, "in-memory", false, "if set to true it will read both CSV files into memory which can speed update processing")
172181
}
173182

174183
func main() {
@@ -273,19 +282,51 @@ func main() {
273282
stopWords := strings.Split(stopWordsOption, ":")
274283
w := csv.NewWriter(out)
275284
lineNo := 0 // line number of csv 1 table
276-
for {
277-
rowA, err := csv1.Read()
278-
if err == io.EOF {
279-
break
285+
if asInMemory == false {
286+
for {
287+
rowA, err := csv1.Read()
288+
if err == io.EOF {
289+
break
290+
}
291+
if err != nil {
292+
fmt.Fprintf(os.Stderr, "%d %s\n", lineNo, err)
293+
} else {
294+
if col1 < len(rowA) && rowA[col1] != "" {
295+
// We are relying on the side effect of writing the CSV output in scanTable
296+
if err := scanTable(w, rowA, col1, csv2Table, col2, stopWords); err != nil {
297+
fmt.Fprintf(os.Stderr, "Can't write CSV at line %d of csv table 1, %s\n", lineNo, err)
298+
}
299+
}
300+
if verbose == true {
301+
if (lineNo%100) == 0 && lineNo > 0 {
302+
fmt.Fprintf(os.Stderr, "\n%d rows of %s processed\n", lineNo, csv1FName)
303+
} else {
304+
fmt.Fprintf(os.Stderr, ".")
305+
}
306+
}
307+
}
308+
lineNo++
280309
}
281-
if err != nil {
282-
fmt.Fprintf(os.Stderr, "%d %s\n", lineNo, err)
283-
} else {
310+
} else {
311+
csv1Table := [][]string{}
312+
313+
// Read table 1 into memory
314+
for {
315+
record, err := csv1.Read()
316+
if err == io.EOF {
317+
break
318+
}
319+
if err != nil {
320+
fmt.Fprintf(os.Stderr, "%s, %s\n", csv1FName, err)
321+
fmt.Fprintf(os.Stderr, "%T %+v\n", record, record)
322+
}
323+
csv1Table = append(csv1Table, record)
324+
}
325+
// For each row in table one scan table two.
326+
for i, rowA := range csv1Table {
284327
if col1 < len(rowA) && rowA[col1] != "" {
285328
// We are relying on the side effect of writing the CSV output in scanTable
286-
scanTable(w, rowA, col1, csv2Table, col2, stopWords)
287-
w.Flush()
288-
if err := w.Error(); err != nil {
329+
if err := scanTable(w, rowA, col1, csv2Table, col2, stopWords); err != nil {
289330
fmt.Fprintf(os.Stderr, "Can't write CSV at line %d of csv table 1, %s\n", lineNo, err)
290331
}
291332
}
@@ -296,8 +337,8 @@ func main() {
296337
fmt.Fprintf(os.Stderr, ".")
297338
}
298339
}
340+
lineNo = i
299341
}
300-
lineNo++
301342
}
302343
w.Flush()
303344
if err := w.Error(); err != nil {

0 commit comments

Comments
 (0)