Skip to content

Commit 1291415

Browse files
authored
sqlite-utils bulk command
* sqlite-utils bulk command, closes #375 * Refactor import_options and insert_upsert_options, refs #377 * Tests for sqlite-utils bulk, refs #377 * Documentation for sqlite-utils bulk, refs #377
1 parent 1b84c17 commit 1291415

File tree

3 files changed

+211
-43
lines changed

3 files changed

+211
-43
lines changed

docs/cli.rst

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,6 +1078,36 @@ The command will fail if you reference columns that do not exist on the table. T
10781078
.. note::
10791079
``upsert`` in sqlite-utils 1.x worked like ``insert ... --replace`` does in 2.x. See `issue #66 <https://github.com/simonw/sqlite-utils/issues/66>`__ for details of this change.
10801080

1081+
1082+
.. _cli_bulk:
1083+
1084+
Executing SQL in bulk
1085+
=====================
1086+
1087+
If you have a JSON, newline-delimited JSON, CSV or TSV file you can execute a bulk SQL query using each of the records in that file using the ``sqlite-utils bulk`` command.
1088+
1089+
The command takes the database file, the SQL to be executed and the file containing records to be used when evaluating the SQL query.
1090+
1091+
The SQL query should include ``:named`` parameters that match the keys in the records.
1092+
1093+
For example, given a ``chickens.csv`` CSV file containing the following::
1094+
1095+
id,name
1096+
1,Blue
1097+
2,Snowy
1098+
3,Azi
1099+
4,Lila
1100+
5,Suna
1101+
6,Cardi
1102+
1103+
You could insert those rows into a pre-created ``chickens`` table like so::
1104+
1105+
$ sqlite-utils bulk chickens.db \
1106+
'insert into chickens (id, name) values (:id, :name)' \
1107+
chickens.csv --csv
1108+
1109+
This command takes the same options as the ``sqlite-utils insert`` command - so it defaults to expecting JSON but can accept other formats using ``--csv`` or ``--tsv`` or ``--nl`` or other options described above.
1110+
10811111
.. _cli_insert_files:
10821112

10831113
Inserting data from files

sqlite_utils/cli.py

Lines changed: 124 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,50 @@ def reset_counts(path, load_extension):
660660
db.reset_counts()
661661

662662

663+
_import_options = (
664+
click.option(
665+
"--flatten",
666+
is_flag=True,
667+
help='Flatten nested JSON objects, so {"a": {"b": 1}} becomes {"a_b": 1}',
668+
),
669+
click.option("--nl", is_flag=True, help="Expect newline-delimited JSON"),
670+
click.option("-c", "--csv", is_flag=True, help="Expect CSV input"),
671+
click.option("--tsv", is_flag=True, help="Expect TSV input"),
672+
click.option(
673+
"--lines",
674+
is_flag=True,
675+
help="Treat each line as a single value called 'line'",
676+
),
677+
click.option(
678+
"--text",
679+
is_flag=True,
680+
help="Treat input as a single value called 'text'",
681+
),
682+
click.option("--convert", help="Python code to convert each item"),
683+
click.option(
684+
"--import",
685+
"imports",
686+
type=str,
687+
multiple=True,
688+
help="Python modules to import",
689+
),
690+
click.option("--delimiter", help="Delimiter to use for CSV files"),
691+
click.option("--quotechar", help="Quote character to use for CSV/TSV"),
692+
click.option("--sniff", is_flag=True, help="Detect delimiter and quote character"),
693+
click.option("--no-headers", is_flag=True, help="CSV file has no header row"),
694+
click.option(
695+
"--encoding",
696+
help="Character encoding for input, defaults to utf-8",
697+
),
698+
)
699+
700+
701+
def import_options(fn):
702+
for decorator in reversed(_import_options):
703+
fn = decorator(fn)
704+
return fn
705+
706+
663707
def insert_upsert_options(fn):
664708
for decorator in reversed(
665709
(
@@ -673,40 +717,9 @@ def insert_upsert_options(fn):
673717
click.option(
674718
"--pk", help="Columns to use as the primary key, e.g. id", multiple=True
675719
),
676-
click.option(
677-
"--flatten",
678-
is_flag=True,
679-
help='Flatten nested JSON objects, so {"a": {"b": 1}} becomes {"a_b": 1}',
680-
),
681-
click.option("--nl", is_flag=True, help="Expect newline-delimited JSON"),
682-
click.option("-c", "--csv", is_flag=True, help="Expect CSV input"),
683-
click.option("--tsv", is_flag=True, help="Expect TSV input"),
684-
click.option(
685-
"--lines",
686-
is_flag=True,
687-
help="Treat each line as a single value called 'line'",
688-
),
689-
click.option(
690-
"--text",
691-
is_flag=True,
692-
help="Treat input as a single value called 'text'",
693-
),
694-
click.option("--convert", help="Python code to convert each item"),
695-
click.option(
696-
"--import",
697-
"imports",
698-
type=str,
699-
multiple=True,
700-
help="Python modules to import",
701-
),
702-
click.option("--delimiter", help="Delimiter to use for CSV files"),
703-
click.option("--quotechar", help="Quote character to use for CSV/TSV"),
704-
click.option(
705-
"--sniff", is_flag=True, help="Detect delimiter and quote character"
706-
),
707-
click.option(
708-
"--no-headers", is_flag=True, help="CSV file has no header row"
709-
),
720+
)
721+
+ _import_options
722+
+ (
710723
click.option(
711724
"--batch-size", type=int, default=100, help="Commit every X records"
712725
),
@@ -726,10 +739,6 @@ def insert_upsert_options(fn):
726739
type=(str, str),
727740
help="Default value that should be set for a column",
728741
),
729-
click.option(
730-
"--encoding",
731-
help="Character encoding for input, defaults to utf-8",
732-
),
733742
click.option(
734743
"-d",
735744
"--detect-types",
@@ -767,6 +776,7 @@ def insert_upsert_implementation(
767776
quotechar,
768777
sniff,
769778
no_headers,
779+
encoding,
770780
batch_size,
771781
alter,
772782
upsert,
@@ -775,11 +785,11 @@ def insert_upsert_implementation(
775785
truncate=False,
776786
not_null=None,
777787
default=None,
778-
encoding=None,
779788
detect_types=None,
780789
analyze=False,
781790
load_extension=None,
782791
silent=False,
792+
bulk_sql=None,
783793
):
784794
db = sqlite_utils.Database(path)
785795
_load_extensions(db, load_extension)
@@ -886,6 +896,12 @@ def insert_upsert_implementation(
886896
# Apply {"$base64": true, ...} decoding, if needed
887897
docs = (decode_base64_values(doc) for doc in docs)
888898

899+
# For bulk_sql= we use cursor.executemany() instead
900+
if bulk_sql:
901+
with db.conn:
902+
db.conn.cursor().executemany(bulk_sql, docs)
903+
return
904+
889905
try:
890906
db[table].insert_all(
891907
docs, pk=pk, batch_size=batch_size, alter=alter, **extra_kwargs
@@ -968,9 +984,9 @@ def insert(
968984
quotechar,
969985
sniff,
970986
no_headers,
987+
encoding,
971988
batch_size,
972989
alter,
973-
encoding,
974990
detect_types,
975991
analyze,
976992
load_extension,
@@ -1020,13 +1036,13 @@ def insert(
10201036
quotechar,
10211037
sniff,
10221038
no_headers,
1039+
encoding,
10231040
batch_size,
10241041
alter=alter,
10251042
upsert=False,
10261043
ignore=ignore,
10271044
replace=replace,
10281045
truncate=truncate,
1029-
encoding=encoding,
10301046
detect_types=detect_types,
10311047
analyze=analyze,
10321048
load_extension=load_extension,
@@ -1058,10 +1074,10 @@ def upsert(
10581074
quotechar,
10591075
sniff,
10601076
no_headers,
1077+
encoding,
10611078
alter,
10621079
not_null,
10631080
default,
1064-
encoding,
10651081
detect_types,
10661082
analyze,
10671083
load_extension,
@@ -1090,12 +1106,12 @@ def upsert(
10901106
quotechar,
10911107
sniff,
10921108
no_headers,
1109+
encoding,
10931110
batch_size,
10941111
alter=alter,
10951112
upsert=True,
10961113
not_null=not_null,
10971114
default=default,
1098-
encoding=encoding,
10991115
detect_types=detect_types,
11001116
analyze=analyze,
11011117
load_extension=load_extension,
@@ -1105,6 +1121,71 @@ def upsert(
11051121
raise click.ClickException(UNICODE_ERROR.format(ex))
11061122

11071123

1124+
@cli.command()
1125+
@click.argument(
1126+
"path",
1127+
type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
1128+
required=True,
1129+
)
1130+
@click.argument("sql")
1131+
@click.argument("file", type=click.File("rb"), required=True)
1132+
@import_options
1133+
@load_extension_option
1134+
def bulk(
1135+
path,
1136+
file,
1137+
sql,
1138+
flatten,
1139+
nl,
1140+
csv,
1141+
tsv,
1142+
lines,
1143+
text,
1144+
convert,
1145+
imports,
1146+
delimiter,
1147+
quotechar,
1148+
sniff,
1149+
no_headers,
1150+
encoding,
1151+
load_extension,
1152+
):
1153+
"""
1154+
Execute parameterized SQL against the provided list of documents.
1155+
"""
1156+
try:
1157+
insert_upsert_implementation(
1158+
path=path,
1159+
table=None,
1160+
file=file,
1161+
pk=None,
1162+
flatten=flatten,
1163+
nl=nl,
1164+
csv=csv,
1165+
tsv=tsv,
1166+
lines=lines,
1167+
text=text,
1168+
convert=convert,
1169+
imports=imports,
1170+
delimiter=delimiter,
1171+
quotechar=quotechar,
1172+
sniff=sniff,
1173+
no_headers=no_headers,
1174+
encoding=encoding,
1175+
batch_size=1,
1176+
alter=False,
1177+
upsert=False,
1178+
not_null=set(),
1179+
default={},
1180+
detect_types=False,
1181+
load_extension=load_extension,
1182+
silent=False,
1183+
bulk_sql=sql,
1184+
)
1185+
except (sqlite3.OperationalError, sqlite3.IntegrityError) as e:
1186+
raise click.ClickException(str(e))
1187+
1188+
11081189
@cli.command(name="create-database")
11091190
@click.argument(
11101191
"path",

tests/test_cli_bulk.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
from click.testing import CliRunner
2+
from sqlite_utils import cli, Database
3+
import pathlib
4+
import pytest
5+
6+
7+
@pytest.fixture
8+
def test_db_and_path(tmpdir):
9+
db_path = str(pathlib.Path(tmpdir) / "data.db")
10+
db = Database(db_path)
11+
db["example"].insert_all(
12+
[
13+
{"id": 1, "name": "One"},
14+
{"id": 2, "name": "Two"},
15+
],
16+
pk="id",
17+
)
18+
return db, db_path
19+
20+
21+
def test_cli_bulk(test_db_and_path):
22+
db, db_path = test_db_and_path
23+
result = CliRunner().invoke(
24+
cli.cli,
25+
[
26+
"bulk",
27+
db_path,
28+
"insert into example (id, name) values (:id, :name)",
29+
"-",
30+
"--nl",
31+
],
32+
input='{"id": 3, "name": "Three"}\n{"id": 4, "name": "Four"}\n',
33+
)
34+
assert result.exit_code == 0, result.output
35+
assert [
36+
{"id": 1, "name": "One"},
37+
{"id": 2, "name": "Two"},
38+
{"id": 3, "name": "Three"},
39+
{"id": 4, "name": "Four"},
40+
] == list(db["example"].rows)
41+
42+
43+
def test_cli_bulk_error(test_db_and_path):
44+
_, db_path = test_db_and_path
45+
result = CliRunner().invoke(
46+
cli.cli,
47+
[
48+
"bulk",
49+
db_path,
50+
"insert into example (id, name) value (:id, :name)",
51+
"-",
52+
"--nl",
53+
],
54+
input='{"id": 3, "name": "Three"}',
55+
)
56+
assert result.exit_code == 1
57+
assert result.output == 'Error: near "value": syntax error\n'

0 commit comments

Comments
 (0)