|
| 1 | +/* |
| 2 | +
|
| 3 | + csv2sql - conversion program to convert a csv file to sql format |
| 4 | + to allow easy checking / validation, and import into a SQLite3 |
| 5 | + database using SQLite '.read' command |
| 6 | +
|
| 7 | + author: simon rowe <[email protected]> |
| 8 | + license: open-source released under "New BSD License" |
| 9 | +
|
| 10 | + version: 0.5 |
| 11 | + created: 16th April 2014 - initial outline code written |
| 12 | + updated: 17th April 2014 - add flags and output file handling |
| 13 | + updated: 27th April 2014 - wrap in double quotes instead of single |
| 14 | + updated: 28th April 2014 - add flush io file buffer to fix SQL missing EOF |
| 15 | + updated: 19th July 2014 - add more help text, tidy up comments and code |
| 16 | + updated: 6th August 2014 - enabled the -k flag to alter the table header characters |
| 17 | +
|
| 18 | +*/ |
| 19 | +package main |
| 20 | + |
| 21 | +import ( |
| 22 | + "bufio" |
| 23 | + "bytes" |
| 24 | + "encoding/csv" |
| 25 | + "flag" |
| 26 | + "fmt" |
| 27 | + "io" |
| 28 | + "os" |
| 29 | + "path/filepath" |
| 30 | + "strconv" |
| 31 | + "strings" |
| 32 | + "time" |
| 33 | +) |
| 34 | + |
| 35 | +// set global variables |
| 36 | + |
| 37 | +// set the version of the app here |
| 38 | +var appversion string = "0.5" |
| 39 | + |
| 40 | +// below used by flag for command line args |
| 41 | +var tableName string |
| 42 | +var csvFileName string |
| 43 | +var keepOrigCols bool |
| 44 | +var debugSwitch bool |
| 45 | + |
| 46 | +// init() function - always runs before main() - used here to set-up required flags variables |
| 47 | +// from the command line parameters provided by the user when they run the app |
| 48 | +func init() { |
| 49 | + // IntVar; StringVar; BoolVar all required: variable, cmd line flag, initial value, description used by flag.Usage() on error / help |
| 50 | + flag.StringVar(&tableName, "t", "", "\tUSE: '-t tablename' where tablename is the name of the SQLite table to hold your CSV file data [MANDATORY]") |
| 51 | + flag.StringVar(&csvFileName, "f", "", "\tUSE: '-f filename.csv' where filename.csv is the name and path to a CSV file that contains your data for conversion [MANDATORY]") |
| 52 | + flag.BoolVar(&keepOrigCols, "k", false, "\tUSE: '-k=true' to keep original csv header fields as the SQL table column names") |
| 53 | + flag.BoolVar(&debugSwitch, "d", false, "\tUSE: '-d=true' to include additional debug output when run") |
| 54 | +} |
| 55 | + |
| 56 | +// |
| 57 | +// FUNCTION: create a filename string for the SQL data to be written too - return it |
| 58 | +// |
| 59 | +func SQLFileName() (filename string) { |
| 60 | + // include the name of the csv file from command line (ie csvFileName) |
| 61 | + // remove any path etc |
| 62 | + var justFileName = filepath.Base(csvFileName) |
| 63 | + var extension = filepath.Ext(csvFileName) |
| 64 | + // remove the file extn |
| 65 | + justFileName = justFileName[0 : len(justFileName)-len(extension)] |
| 66 | + // get a date and time stamp - use GoLang reference date of: Mon Jan 2 15:04:05 MST 2006 |
| 67 | + // TODO: figure out how to make this work - so filename has timestamp too ?? |
| 68 | + //fileDate, err := time.Parse("2006-01-02", time.Now().String()) |
| 69 | + //if err != nil { |
| 70 | + // panic(err) |
| 71 | + //} |
| 72 | + //fileDate := fileDate.String() |
| 73 | + //fmt.Printf("\n%s\n", fileDate) |
| 74 | + sqlOutFile := "SQL-" + justFileName + ".sql" |
| 75 | + return sqlOutFile |
| 76 | +} |
| 77 | + |
| 78 | +// |
| 79 | +// FUNCTION: display a banner and help information on the screen |
| 80 | +// information is displayed when the program is run without including |
| 81 | +// any command line parameters - so assumes you want help to run it |
| 82 | +// |
| 83 | +func printBanner() { |
| 84 | + // add the help and about text to the variable 'about' |
| 85 | + about := ` |
| 86 | + ABOUT CSV2SQL |
| 87 | + ¯¯¯¯¯¯¯¯¯¯¯¯¯¯ |
| 88 | + CVS2SQL is a small simple program specifically designed to quickly convert a |
| 89 | + coma separated value (CSV) file into structured query language (SQL) |
| 90 | + statements, that can then be used as an import source for an SQLite database. |
| 91 | +
|
| 92 | + The CSV file is also integrity checked while being converted to ensure it |
| 93 | + has a consistent number of column values throughout the file. |
| 94 | +
|
| 95 | + The first line of your CSV file will be designated as the header line - and |
| 96 | + therefore will become the column names in your subsequent SQLite database |
| 97 | + table. |
| 98 | +
|
| 99 | + Please note that any spaces or the following characters | - + @ # / \ : ( ) ' |
| 100 | + will be replaced in the column names with the underscore character (ie '_'). |
| 101 | + |
| 102 | + This is to avoid SQL syntax import issues, and make any future SQL statements |
| 103 | + referencing these column names easier to construct. You can of course rename |
| 104 | + these characters in your CSV file first. Or use the command line switch |
| 105 | + ' -k=true ' to force them to be left as is. |
| 106 | +
|
| 107 | + The rest of the CSV file will be split up on the comma character, on a per |
| 108 | + line basis. The eventual contents in your new database table will therefore |
| 109 | + be aligned to the column values - so each table row is a line from the CSV |
| 110 | + file. |
| 111 | +
|
| 112 | + The output filename (ie <sql-filename.sql>) will be created |
| 113 | + automatically for you when you run the program. Note that it will also |
| 114 | + overwrite / replace any existing file with the same name! The filename it |
| 115 | + will create will be based on your input filename, prefixed with 'SQL' and |
| 116 | + the file extension changed to '.sql'. So 'test-123.csv' -> 'SQL-test-123.sql'. |
| 117 | +
|
| 118 | + The newly generated output file will contain the SQL statements to allow |
| 119 | + the contents of your CSV file to be imported into a new SQLite database |
| 120 | + table. The table name to be used must be provide on the command line also |
| 121 | + as ' -t tablename ' - where tablename is the name of the SQLite table to |
| 122 | + hold your CSV file data. |
| 123 | +
|
| 124 | + To import the table and it contents, open your SQLite database with the |
| 125 | + sqlite3 program, and use: .read <sql-filename.sql> |
| 126 | + |
| 127 | + Latest version is kept on GitHub here: https://github.com/wiremoons |
| 128 | + The program is written in Go - more information here: http://www.golang.org/ |
| 129 | + More information on SQLite can be found here: http://www.sqlite.org/ |
| 130 | + The program was written by Simon Rowe, licensed under "New BSD License" |
| 131 | + ` |
| 132 | + // now display the information on screen |
| 133 | + fmt.Println("\n\t\t\tcsv2sql conversion program\n\t\t\t\tVersion:", appversion, "\n", about) |
| 134 | +} |
| 135 | + |
| 136 | +////////////////////////////////////////////////////////////////////////////// |
| 137 | +// |
| 138 | +// MAIN STARTS HERE |
| 139 | +// |
| 140 | +////////////////////////////////////////////////////////////////////////////// |
| 141 | + |
| 142 | +func main() { |
| 143 | + //------------------------------------------------------------------------- |
| 144 | + // sort out the command line arguments |
| 145 | + //------------------------------------------------------------------------- |
| 146 | + // get the command line args passed to the program |
| 147 | + flag.Parse() |
| 148 | + // if debug is enabled - confirm the command line parameters received |
| 149 | + if debugSwitch { |
| 150 | + fmt.Println("Command Line Arguments provided are:") |
| 151 | + fmt.Println("\tCSV file to use:", csvFileName) |
| 152 | + fmt.Println("\tSQL table name to use:", tableName) |
| 153 | + fmt.Println("\tKeep original csv header fields:", strconv.FormatBool(keepOrigCols)) |
| 154 | + fmt.Println("\tDisplay additional debug output when run:", strconv.FormatBool(debugSwitch)) |
| 155 | + } |
| 156 | + |
| 157 | + // check we have a table name and csv file to work with - otherwise abort |
| 158 | + if csvFileName == "" || tableName == "" { |
| 159 | + printBanner() |
| 160 | + fmt.Println("ERROR: please provide both a 'table name' and the input 'CSV filename' to use\nrun 'csv2sql --help' for more information") |
| 161 | + //fmt.Println("Usage:",flag.Usage,"Command Line:",flag.CommandLine) |
| 162 | + os.Exit(-2) |
| 163 | + } |
| 164 | + |
| 165 | + //------------------------------------------------------------------------- |
| 166 | + // open and prepare the CSV input file |
| 167 | + //------------------------------------------------------------------------- |
| 168 | + if debugSwitch { |
| 169 | + fmt.Println("Opening the CSV file:", csvFileName) |
| 170 | + } |
| 171 | + // open the CSV file - name provided via command line input - handle 'file' |
| 172 | + file, err := os.Open(csvFileName) |
| 173 | + // error - if we have one exit as CSV file not right |
| 174 | + if err != nil { |
| 175 | + fmt.Printf("ERROR: %s\n", err) |
| 176 | + os.Exit(-3) |
| 177 | + } |
| 178 | + // now file is open - defer the close of CSV file handle until we return |
| 179 | + defer file.Close() |
| 180 | + // connect a CSV reader to the file handle - which is the actual opened |
| 181 | + // CSV file |
| 182 | + // TODO : is there an error from this to check? |
| 183 | + reader := csv.NewReader(file) |
| 184 | + |
| 185 | + //------------------------------------------------------------------------- |
| 186 | + // open and prepare the SQL output file |
| 187 | + //------------------------------------------------------------------------- |
| 188 | + // get a new filename to write the SQl converted data into - call our |
| 189 | + // function SQLFileName() to obtain a suitable string for the new filename |
| 190 | + // TODO : ad option to output to stdout instead of a file only |
| 191 | + sqlOutFile := SQLFileName() |
| 192 | + if debugSwitch { |
| 193 | + fmt.Println("Opening the SQL output file:", sqlOutFile) |
| 194 | + } |
| 195 | + |
| 196 | + // open the new file using the name we obtained above - handle 'filesql' |
| 197 | + filesql, err := os.Create(sqlOutFile) |
| 198 | + // error - if we have one when trying open & create the new file |
| 199 | + if err != nil { |
| 200 | + fmt.Println("Error:", err) |
| 201 | + return |
| 202 | + } |
| 203 | + // now new file is open - defer the close of the file handle until we return |
| 204 | + defer filesql.Close() |
| 205 | + // attach the opened new sql file handle to a buffered file writer |
| 206 | + // the buffered file writer has the handle 'sqlFileBuffer' |
| 207 | + sqlFileBuffer := bufio.NewWriter(filesql) |
| 208 | + |
| 209 | + //------------------------------------------------------------------------- |
| 210 | + // prepare to read the each line of the CSV file - and write out to the SQl |
| 211 | + //------------------------------------------------------------------------- |
| 212 | + // track the number of lines in the csv file |
| 213 | + lineCount := 0 |
| 214 | + // track number of fields in csv file |
| 215 | + csvFields := 0 |
| 216 | + |
| 217 | + // grab time now - so can calculate how long it takes to process the file |
| 218 | + start := time.Now() |
| 219 | + |
| 220 | + // create a buffer to hold each line of the SQL file as we build it |
| 221 | + // handle to this buffer is called 'strbuffer' |
| 222 | + var strbuffer bytes.Buffer |
| 223 | + |
| 224 | + // START - processing of each line in the CSV input file |
| 225 | + //------------------------------------------------------------------------- |
| 226 | + // loop through the csv file until EOF - or until we hit an error in parsing it. |
| 227 | + // Data is read in for each line of the csv file and held in the variable |
| 228 | + // 'record'. Build a string for each line - wrapped with the SQL and |
| 229 | + // then output to the SQL file writer in its completed new form |
| 230 | + //------------------------------------------------------------------------- |
| 231 | + for { |
| 232 | + record, err := reader.Read() |
| 233 | + |
| 234 | + // if we hit end of file (EOF) or another unexpected error |
| 235 | + if err == io.EOF { |
| 236 | + break |
| 237 | + } else if err != nil { |
| 238 | + fmt.Println("Error:", err) |
| 239 | + return |
| 240 | + } |
| 241 | + |
| 242 | + // get the number of fields in the CSV file on this line |
| 243 | + csvFields = len(record) |
| 244 | + |
| 245 | + // if we are processing the first line - use the record field contents |
| 246 | + // as the SQL table column names - add to the temp string 'strbuffer' |
| 247 | + // use the tablename provided by the user |
| 248 | + // TODO - add option to skip this line if user is adding data to an existing table? |
| 249 | + if lineCount == 0 { |
| 250 | + strbuffer.WriteString("PRAGMA foreign_keys=OFF;\nBEGIN TRANSACTION;\nCREATE TABLE " + tableName + " (") |
| 251 | + } |
| 252 | + |
| 253 | + // if any line except the first one : |
| 254 | + // print the start of the SQL insert statement for the record |
| 255 | + // and - add to the temp string 'strbuffer' |
| 256 | + // use the tablename provided by the user |
| 257 | + if lineCount > 0 { |
| 258 | + strbuffer.WriteString("INSERT INTO " + tableName + " VALUES (") |
| 259 | + } |
| 260 | + // loop through each of the csv lines individual fields held in 'record' |
| 261 | + // len(record) tells us how many fields are on this line - so we loop right number of times |
| 262 | + for i := 0; i < len(record); i++ { |
| 263 | + // if we are processing the first line used for the table column name - update the |
| 264 | + // record field contents to remove the characters: space | - + @ # / \ : ( ) ' |
| 265 | + // from the SQL table column names. Can be overridden on command line with '-k true' |
| 266 | + if (lineCount == 0) && (keepOrigCols == false) { |
| 267 | + // for debug - output info so we can see current field being processed |
| 268 | + if debugSwitch { |
| 269 | + fmt.Printf("Running header clean up for '%s' ", record[i]) |
| 270 | + } |
| 271 | + // call the function cleanHeader to do clean up on this field |
| 272 | + record[i] = cleanHeader(record[i]) |
| 273 | + // for debug - output info so we can see any changes now made |
| 274 | + if debugSwitch { |
| 275 | + fmt.Printf("changed to '%s'\n", record[i]) |
| 276 | + } |
| 277 | + } |
| 278 | + // if a csv record field is empty or has the text "NULL" - replace it with actual NULL field in SQLite |
| 279 | + // otherwise just wrap the existing content with '' |
| 280 | + // TODO : make sure we don't try to create a 'NULL' table column name? |
| 281 | + if len(record[i]) == 0 || record[i] == "NULL" { |
| 282 | + strbuffer.WriteString("NULL") |
| 283 | + } else { |
| 284 | + strbuffer.WriteString("\"" + record[i] + "\"") |
| 285 | + } |
| 286 | + // if we have not reached the last record yet - add a coma also to the output |
| 287 | + if i < len(record)-1 { |
| 288 | + strbuffer.WriteString(",") |
| 289 | + } |
| 290 | + } |
| 291 | + // end of the line - so output SQL format required ');' and newline |
| 292 | + strbuffer.WriteString(");\n") |
| 293 | + // line of SQL is complete - so push out to the new SQL file |
| 294 | + bWritten, err := sqlFileBuffer.WriteString(strbuffer.String()) |
| 295 | + // check it wrote data ok - otherwise report the error giving the line number affected |
| 296 | + if (err != nil) || (bWritten != len(strbuffer.Bytes())) { |
| 297 | + fmt.Printf("WARNING: Error writing to SQL file line %d: %s", lineCount, err) |
| 298 | + return |
| 299 | + } |
| 300 | + // reset the string buffer - so it is empty ready for the next line to build |
| 301 | + strbuffer.Reset() |
| 302 | + // for debug - show the line number we are processing from the CSV file |
| 303 | + if debugSwitch { |
| 304 | + fmt.Print("..", lineCount) |
| 305 | + } |
| 306 | + // increment the line count - and loop back around for next line of the CSV file |
| 307 | + lineCount += 1 |
| 308 | + } |
| 309 | + // END - reached the end of processing each line of the input CSV file |
| 310 | + // |
| 311 | + if debugSwitch { |
| 312 | + fmt.Println("\ncsv file processing complete - outputted to the new SQL file: ", sqlOutFile) |
| 313 | + } |
| 314 | + // finished processing the csv input file lines - so close off the SQL statements |
| 315 | + strbuffer.WriteString("COMMIT;\n") |
| 316 | + // write out final line to the SQL file |
| 317 | + bWritten, err := sqlFileBuffer.WriteString(strbuffer.String()) |
| 318 | + // check it wrote data ok - otherwise report the error giving the line number affected |
| 319 | + if (err != nil) || (bWritten != len(strbuffer.Bytes())) { |
| 320 | + fmt.Printf("WARNING: Error outputting final line of the SQL file: line %d: %s", lineCount, err) |
| 321 | + return |
| 322 | + } |
| 323 | + if debugSwitch { |
| 324 | + fmt.Println("SQL file write complete") |
| 325 | + } |
| 326 | + fmt.Println("\nDONE\n\tCSV file processing complete, and the new SQL file format was written to: ", sqlOutFile) |
| 327 | + // finished the SQl file data writing - flush any IO buffers |
| 328 | + // NB above required as the data was being lost otherwise - maybe a bug in go version 1.2 only? |
| 329 | + sqlFileBuffer.Flush() |
| 330 | + // reset the string buffer - so it is empty as it is no longer needed |
| 331 | + strbuffer.Reset() |
| 332 | + // stop the timer for the SQL file creation process |
| 333 | + end := time.Now() |
| 334 | + |
| 335 | + // print out some stats about the csv file processed |
| 336 | + fmt.Println("\nSTATS\n\tCSV file", csvFileName, "has", lineCount, "lines with", csvFields, "CSV fields per record") |
| 337 | + fmt.Println("\tThe conversion took", end.Sub(start), "to run.\n\nAll is well.\n") |
| 338 | +} |
| 339 | + |
| 340 | +// |
| 341 | +// cleanHeader receives a string and removes the characters: space | - + @ # / \ : ( ) ' |
| 342 | +// Function is used to clean up the CSV file header fields as they will be used for column table names |
| 343 | +// in our SQLIte database. Therefore we don't want any odd characters for our table column names |
| 344 | +// |
| 345 | +func cleanHeader(headField string) string { |
| 346 | + // ok - remove any spaces and replace with _ |
| 347 | + headField = strings.Replace(headField, " ", "_", -1) |
| 348 | + // ok - remove any | and replace with _ |
| 349 | + headField = strings.Replace(headField, "|", "_", -1) |
| 350 | + // ok - remove any - and replace with _ |
| 351 | + headField = strings.Replace(headField, "-", "_", -1) |
| 352 | + // ok - remove any + and replace with _ |
| 353 | + headField = strings.Replace(headField, "+", "_", -1) |
| 354 | + // ok - remove any @ and replace with _ |
| 355 | + headField = strings.Replace(headField, "@", "_", -1) |
| 356 | + // ok - remove any # and replace with _ |
| 357 | + headField = strings.Replace(headField, "#", "_", -1) |
| 358 | + // ok - remove any / and replace with _ |
| 359 | + headField = strings.Replace(headField, "/", "_", -1) |
| 360 | + // ok - remove any \ and replace with _ |
| 361 | + headField = strings.Replace(headField, "\\", "_", -1) |
| 362 | + // ok - remove any : and replace with _ |
| 363 | + headField = strings.Replace(headField, ":", "_", -1) |
| 364 | + // ok - remove any ( and replace with _ |
| 365 | + headField = strings.Replace(headField, "(", "_", -1) |
| 366 | + // ok - remove any ) and replace with _ |
| 367 | + headField = strings.Replace(headField, ")", "_", -1) |
| 368 | + // ok - remove any ' and replace with _ |
| 369 | + headField = strings.Replace(headField, "'", "_", -1) |
| 370 | + return headField |
| 371 | +} |
| 372 | + |
0 commit comments