|
1 | 1 | import os |
| 2 | +import sqlite3 |
2 | 3 | import sys |
3 | 4 |
|
4 | 5 | from glob import glob |
|
7 | 8 | DATABASE_NAME = "mimic4.db" |
8 | 9 | THRESHOLD_SIZE = 5 * 10**7 |
9 | 10 | CHUNKSIZE = 10**6 |
10 | | -CONNECTION_STRING = "sqlite:///{}".format(DATABASE_NAME) |
11 | 11 |
|
12 | 12 | if os.path.exists(DATABASE_NAME): |
13 | 13 | msg = "File {} already exists.".format(DATABASE_NAME) |
14 | 14 | print(msg) |
15 | 15 | sys.exit() |
16 | 16 |
|
17 | | -for f in glob("**/*.csv*", recursive=True): |
18 | | - print("Starting processing {}".format(f)) |
19 | | - folder, filename = os.path.split(f) |
20 | | - tablename = filename.lower() |
21 | | - if tablename.endswith('.gz'): |
22 | | - tablename = tablename[:-3] |
23 | | - if tablename.endswith('.csv'): |
24 | | - tablename = tablename[:-4] |
25 | | - if os.path.getsize(f) < THRESHOLD_SIZE: |
26 | | - df = pd.read_csv(f) |
27 | | - df.to_sql(tablename, CONNECTION_STRING) |
28 | | - else: |
29 | | - # If the file is too large, let's do the work in chunks |
30 | | - for chunk in pd.read_csv(f, chunksize=CHUNKSIZE, low_memory=False): |
31 | | - chunk.to_sql(tablename, CONNECTION_STRING, if_exists="append") |
32 | | - print("Finished processing {}".format(f)) |
| 17 | +with sqlite3.Connection(DATABASE_NAME) as connection: |
| 18 | + for f in glob("**/*.csv*", recursive=True): |
| 19 | + print("Starting processing {}".format(f)) |
| 20 | + folder, filename = os.path.split(f) |
| 21 | + tablename = filename.lower() |
| 22 | + if tablename.endswith('.gz'): |
| 23 | + tablename = tablename[:-3] |
| 24 | + if tablename.endswith('.csv'): |
| 25 | + tablename = tablename[:-4] |
| 26 | + if os.path.getsize(f) < THRESHOLD_SIZE: |
| 27 | + df = pd.read_csv(f) |
| 28 | + df.to_sql(tablename, connection) |
| 29 | + else: |
| 30 | + # If the file is too large, let's do the work in chunks |
| 31 | + for chunk in pd.read_csv(f, chunksize=CHUNKSIZE, low_memory=False): |
| 32 | + chunk.to_sql(tablename, connection, if_exists="append") |
| 33 | + print("Finished processing {}".format(f)) |
33 | 34 |
|
34 | 35 | print("Should be all done!") |
0 commit comments