Skip to content

Commit 9585ce2

Browse files
committed
Create csv2sql.go
Source code file - first commit for version 0.5
1 parent a6aa649 commit 9585ce2

File tree

1 file changed

+372
-0
lines changed

1 file changed

+372
-0
lines changed

csv2sql.go

Lines changed: 372 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,372 @@
1+
/*
2+
3+
csv2sql - conversion program to convert a csv file to sql format
4+
to allow easy checking / validation, and import into a SQLite3
5+
database using SQLite '.read' command
6+
7+
author: simon rowe <[email protected]>
8+
license: open-source released under "New BSD License"
9+
10+
version: 0.5
11+
created: 16th April 2014 - initial outline code written
12+
updated: 17th April 2014 - add flags and output file handling
13+
updated: 27th April 2014 - wrap in double quotes instead of single
14+
updated: 28th April 2014 - add flush io file buffer to fix SQL missing EOF
15+
updated: 19th July 2014 - add more help text, tidy up comments and code
16+
updated: 6th August 2014 - enabled the -k flag to alter the table header characters
17+
18+
*/
19+
package main
20+
21+
import (
22+
"bufio"
23+
"bytes"
24+
"encoding/csv"
25+
"flag"
26+
"fmt"
27+
"io"
28+
"os"
29+
"path/filepath"
30+
"strconv"
31+
"strings"
32+
"time"
33+
)
34+
35+
// set global variables
36+
37+
// set the version of the app here
38+
var appversion string = "0.5"
39+
40+
// below used by flag for command line args
41+
var tableName string
42+
var csvFileName string
43+
var keepOrigCols bool
44+
var debugSwitch bool
45+
46+
// init() function - always runs before main() - used here to set-up required flags variables
47+
// from the command line parameters provided by the user when they run the app
48+
func init() {
49+
// IntVar; StringVar; BoolVar all required: variable, cmd line flag, initial value, description used by flag.Usage() on error / help
50+
flag.StringVar(&tableName, "t", "", "\tUSE: '-t tablename' where tablename is the name of the SQLite table to hold your CSV file data [MANDATORY]")
51+
flag.StringVar(&csvFileName, "f", "", "\tUSE: '-f filename.csv' where filename.csv is the name and path to a CSV file that contains your data for conversion [MANDATORY]")
52+
flag.BoolVar(&keepOrigCols, "k", false, "\tUSE: '-k=true' to keep original csv header fields as the SQL table column names")
53+
flag.BoolVar(&debugSwitch, "d", false, "\tUSE: '-d=true' to include additional debug output when run")
54+
}
55+
56+
//
57+
// FUNCTION: create a filename string for the SQL data to be written too - return it
58+
//
59+
func SQLFileName() (filename string) {
60+
// include the name of the csv file from command line (ie csvFileName)
61+
// remove any path etc
62+
var justFileName = filepath.Base(csvFileName)
63+
var extension = filepath.Ext(csvFileName)
64+
// remove the file extn
65+
justFileName = justFileName[0 : len(justFileName)-len(extension)]
66+
// get a date and time stamp - use GoLang reference date of: Mon Jan 2 15:04:05 MST 2006
67+
// TODO: figure out how to make this work - so filename has timestamp too ??
68+
//fileDate, err := time.Parse("2006-01-02", time.Now().String())
69+
//if err != nil {
70+
// panic(err)
71+
//}
72+
//fileDate := fileDate.String()
73+
//fmt.Printf("\n%s\n", fileDate)
74+
sqlOutFile := "SQL-" + justFileName + ".sql"
75+
return sqlOutFile
76+
}
77+
78+
//
79+
// FUNCTION: display a banner and help information on the screen
80+
// information is displayed when the program is run without including
81+
// any command line parameters - so assumes you want help to run it
82+
//
83+
func printBanner() {
84+
// add the help and about text to the variable 'about'
85+
about := `
86+
ABOUT CSV2SQL
87+
¯¯¯¯¯¯¯¯¯¯¯¯¯¯
88+
CVS2SQL is a small simple program specifically designed to quickly convert a
89+
coma separated value (CSV) file into structured query language (SQL)
90+
statements, that can then be used as an import source for an SQLite database.
91+
92+
The CSV file is also integrity checked while being converted to ensure it
93+
has a consistent number of column values throughout the file.
94+
95+
The first line of your CSV file will be designated as the header line - and
96+
therefore will become the column names in your subsequent SQLite database
97+
table.
98+
99+
Please note that any spaces or the following characters | - + @ # / \ : ( ) '
100+
will be replaced in the column names with the underscore character (ie '_').
101+
102+
This is to avoid SQL syntax import issues, and make any future SQL statements
103+
referencing these column names easier to construct. You can of course rename
104+
these characters in your CSV file first. Or use the command line switch
105+
' -k=true ' to force them to be left as is.
106+
107+
The rest of the CSV file will be split up on the comma character, on a per
108+
line basis. The eventual contents in your new database table will therefore
109+
be aligned to the column values - so each table row is a line from the CSV
110+
file.
111+
112+
The output filename (ie <sql-filename.sql>) will be created
113+
automatically for you when you run the program. Note that it will also
114+
overwrite / replace any existing file with the same name! The filename it
115+
will create will be based on your input filename, prefixed with 'SQL' and
116+
the file extension changed to '.sql'. So 'test-123.csv' -> 'SQL-test-123.sql'.
117+
118+
The newly generated output file will contain the SQL statements to allow
119+
the contents of your CSV file to be imported into a new SQLite database
120+
table. The table name to be used must be provide on the command line also
121+
as ' -t tablename ' - where tablename is the name of the SQLite table to
122+
hold your CSV file data.
123+
124+
To import the table and it contents, open your SQLite database with the
125+
sqlite3 program, and use: .read <sql-filename.sql>
126+
127+
Latest version is kept on GitHub here: https://github.com/wiremoons
128+
The program is written in Go - more information here: http://www.golang.org/
129+
More information on SQLite can be found here: http://www.sqlite.org/
130+
The program was written by Simon Rowe, licensed under "New BSD License"
131+
`
132+
// now display the information on screen
133+
fmt.Println("\n\t\t\tcsv2sql conversion program\n\t\t\t\tVersion:", appversion, "\n", about)
134+
}
135+
136+
//////////////////////////////////////////////////////////////////////////////
137+
//
138+
// MAIN STARTS HERE
139+
//
140+
//////////////////////////////////////////////////////////////////////////////
141+
142+
func main() {
143+
//-------------------------------------------------------------------------
144+
// sort out the command line arguments
145+
//-------------------------------------------------------------------------
146+
// get the command line args passed to the program
147+
flag.Parse()
148+
// if debug is enabled - confirm the command line parameters received
149+
if debugSwitch {
150+
fmt.Println("Command Line Arguments provided are:")
151+
fmt.Println("\tCSV file to use:", csvFileName)
152+
fmt.Println("\tSQL table name to use:", tableName)
153+
fmt.Println("\tKeep original csv header fields:", strconv.FormatBool(keepOrigCols))
154+
fmt.Println("\tDisplay additional debug output when run:", strconv.FormatBool(debugSwitch))
155+
}
156+
157+
// check we have a table name and csv file to work with - otherwise abort
158+
if csvFileName == "" || tableName == "" {
159+
printBanner()
160+
fmt.Println("ERROR: please provide both a 'table name' and the input 'CSV filename' to use\nrun 'csv2sql --help' for more information")
161+
//fmt.Println("Usage:",flag.Usage,"Command Line:",flag.CommandLine)
162+
os.Exit(-2)
163+
}
164+
165+
//-------------------------------------------------------------------------
166+
// open and prepare the CSV input file
167+
//-------------------------------------------------------------------------
168+
if debugSwitch {
169+
fmt.Println("Opening the CSV file:", csvFileName)
170+
}
171+
// open the CSV file - name provided via command line input - handle 'file'
172+
file, err := os.Open(csvFileName)
173+
// error - if we have one exit as CSV file not right
174+
if err != nil {
175+
fmt.Printf("ERROR: %s\n", err)
176+
os.Exit(-3)
177+
}
178+
// now file is open - defer the close of CSV file handle until we return
179+
defer file.Close()
180+
// connect a CSV reader to the file handle - which is the actual opened
181+
// CSV file
182+
// TODO : is there an error from this to check?
183+
reader := csv.NewReader(file)
184+
185+
//-------------------------------------------------------------------------
186+
// open and prepare the SQL output file
187+
//-------------------------------------------------------------------------
188+
// get a new filename to write the SQl converted data into - call our
189+
// function SQLFileName() to obtain a suitable string for the new filename
190+
// TODO : ad option to output to stdout instead of a file only
191+
sqlOutFile := SQLFileName()
192+
if debugSwitch {
193+
fmt.Println("Opening the SQL output file:", sqlOutFile)
194+
}
195+
196+
// open the new file using the name we obtained above - handle 'filesql'
197+
filesql, err := os.Create(sqlOutFile)
198+
// error - if we have one when trying open & create the new file
199+
if err != nil {
200+
fmt.Println("Error:", err)
201+
return
202+
}
203+
// now new file is open - defer the close of the file handle until we return
204+
defer filesql.Close()
205+
// attach the opened new sql file handle to a buffered file writer
206+
// the buffered file writer has the handle 'sqlFileBuffer'
207+
sqlFileBuffer := bufio.NewWriter(filesql)
208+
209+
//-------------------------------------------------------------------------
210+
// prepare to read the each line of the CSV file - and write out to the SQl
211+
//-------------------------------------------------------------------------
212+
// track the number of lines in the csv file
213+
lineCount := 0
214+
// track number of fields in csv file
215+
csvFields := 0
216+
217+
// grab time now - so can calculate how long it takes to process the file
218+
start := time.Now()
219+
220+
// create a buffer to hold each line of the SQL file as we build it
221+
// handle to this buffer is called 'strbuffer'
222+
var strbuffer bytes.Buffer
223+
224+
// START - processing of each line in the CSV input file
225+
//-------------------------------------------------------------------------
226+
// loop through the csv file until EOF - or until we hit an error in parsing it.
227+
// Data is read in for each line of the csv file and held in the variable
228+
// 'record'. Build a string for each line - wrapped with the SQL and
229+
// then output to the SQL file writer in its completed new form
230+
//-------------------------------------------------------------------------
231+
for {
232+
record, err := reader.Read()
233+
234+
// if we hit end of file (EOF) or another unexpected error
235+
if err == io.EOF {
236+
break
237+
} else if err != nil {
238+
fmt.Println("Error:", err)
239+
return
240+
}
241+
242+
// get the number of fields in the CSV file on this line
243+
csvFields = len(record)
244+
245+
// if we are processing the first line - use the record field contents
246+
// as the SQL table column names - add to the temp string 'strbuffer'
247+
// use the tablename provided by the user
248+
// TODO - add option to skip this line if user is adding data to an existing table?
249+
if lineCount == 0 {
250+
strbuffer.WriteString("PRAGMA foreign_keys=OFF;\nBEGIN TRANSACTION;\nCREATE TABLE " + tableName + " (")
251+
}
252+
253+
// if any line except the first one :
254+
// print the start of the SQL insert statement for the record
255+
// and - add to the temp string 'strbuffer'
256+
// use the tablename provided by the user
257+
if lineCount > 0 {
258+
strbuffer.WriteString("INSERT INTO " + tableName + " VALUES (")
259+
}
260+
// loop through each of the csv lines individual fields held in 'record'
261+
// len(record) tells us how many fields are on this line - so we loop right number of times
262+
for i := 0; i < len(record); i++ {
263+
// if we are processing the first line used for the table column name - update the
264+
// record field contents to remove the characters: space | - + @ # / \ : ( ) '
265+
// from the SQL table column names. Can be overridden on command line with '-k true'
266+
if (lineCount == 0) && (keepOrigCols == false) {
267+
// for debug - output info so we can see current field being processed
268+
if debugSwitch {
269+
fmt.Printf("Running header clean up for '%s' ", record[i])
270+
}
271+
// call the function cleanHeader to do clean up on this field
272+
record[i] = cleanHeader(record[i])
273+
// for debug - output info so we can see any changes now made
274+
if debugSwitch {
275+
fmt.Printf("changed to '%s'\n", record[i])
276+
}
277+
}
278+
// if a csv record field is empty or has the text "NULL" - replace it with actual NULL field in SQLite
279+
// otherwise just wrap the existing content with ''
280+
// TODO : make sure we don't try to create a 'NULL' table column name?
281+
if len(record[i]) == 0 || record[i] == "NULL" {
282+
strbuffer.WriteString("NULL")
283+
} else {
284+
strbuffer.WriteString("\"" + record[i] + "\"")
285+
}
286+
// if we have not reached the last record yet - add a coma also to the output
287+
if i < len(record)-1 {
288+
strbuffer.WriteString(",")
289+
}
290+
}
291+
// end of the line - so output SQL format required ');' and newline
292+
strbuffer.WriteString(");\n")
293+
// line of SQL is complete - so push out to the new SQL file
294+
bWritten, err := sqlFileBuffer.WriteString(strbuffer.String())
295+
// check it wrote data ok - otherwise report the error giving the line number affected
296+
if (err != nil) || (bWritten != len(strbuffer.Bytes())) {
297+
fmt.Printf("WARNING: Error writing to SQL file line %d: %s", lineCount, err)
298+
return
299+
}
300+
// reset the string buffer - so it is empty ready for the next line to build
301+
strbuffer.Reset()
302+
// for debug - show the line number we are processing from the CSV file
303+
if debugSwitch {
304+
fmt.Print("..", lineCount)
305+
}
306+
// increment the line count - and loop back around for next line of the CSV file
307+
lineCount += 1
308+
}
309+
// END - reached the end of processing each line of the input CSV file
310+
//
311+
if debugSwitch {
312+
fmt.Println("\ncsv file processing complete - outputted to the new SQL file: ", sqlOutFile)
313+
}
314+
// finished processing the csv input file lines - so close off the SQL statements
315+
strbuffer.WriteString("COMMIT;\n")
316+
// write out final line to the SQL file
317+
bWritten, err := sqlFileBuffer.WriteString(strbuffer.String())
318+
// check it wrote data ok - otherwise report the error giving the line number affected
319+
if (err != nil) || (bWritten != len(strbuffer.Bytes())) {
320+
fmt.Printf("WARNING: Error outputting final line of the SQL file: line %d: %s", lineCount, err)
321+
return
322+
}
323+
if debugSwitch {
324+
fmt.Println("SQL file write complete")
325+
}
326+
fmt.Println("\nDONE\n\tCSV file processing complete, and the new SQL file format was written to: ", sqlOutFile)
327+
// finished the SQl file data writing - flush any IO buffers
328+
// NB above required as the data was being lost otherwise - maybe a bug in go version 1.2 only?
329+
sqlFileBuffer.Flush()
330+
// reset the string buffer - so it is empty as it is no longer needed
331+
strbuffer.Reset()
332+
// stop the timer for the SQL file creation process
333+
end := time.Now()
334+
335+
// print out some stats about the csv file processed
336+
fmt.Println("\nSTATS\n\tCSV file", csvFileName, "has", lineCount, "lines with", csvFields, "CSV fields per record")
337+
fmt.Println("\tThe conversion took", end.Sub(start), "to run.\n\nAll is well.\n")
338+
}
339+
340+
//
341+
// cleanHeader receives a string and removes the characters: space | - + @ # / \ : ( ) '
342+
// Function is used to clean up the CSV file header fields as they will be used for column table names
343+
// in our SQLIte database. Therefore we don't want any odd characters for our table column names
344+
//
345+
func cleanHeader(headField string) string {
346+
// ok - remove any spaces and replace with _
347+
headField = strings.Replace(headField, " ", "_", -1)
348+
// ok - remove any | and replace with _
349+
headField = strings.Replace(headField, "|", "_", -1)
350+
// ok - remove any - and replace with _
351+
headField = strings.Replace(headField, "-", "_", -1)
352+
// ok - remove any + and replace with _
353+
headField = strings.Replace(headField, "+", "_", -1)
354+
// ok - remove any @ and replace with _
355+
headField = strings.Replace(headField, "@", "_", -1)
356+
// ok - remove any # and replace with _
357+
headField = strings.Replace(headField, "#", "_", -1)
358+
// ok - remove any / and replace with _
359+
headField = strings.Replace(headField, "/", "_", -1)
360+
// ok - remove any \ and replace with _
361+
headField = strings.Replace(headField, "\\", "_", -1)
362+
// ok - remove any : and replace with _
363+
headField = strings.Replace(headField, ":", "_", -1)
364+
// ok - remove any ( and replace with _
365+
headField = strings.Replace(headField, "(", "_", -1)
366+
// ok - remove any ) and replace with _
367+
headField = strings.Replace(headField, ")", "_", -1)
368+
// ok - remove any ' and replace with _
369+
headField = strings.Replace(headField, "'", "_", -1)
370+
return headField
371+
}
372+

0 commit comments

Comments
 (0)