Skip to content

Commit 9258f4b

Browse files
committed
sqlite-utils memory --analyze, closes #320
1 parent d7b1024 commit 9258f4b

File tree

3 files changed

+67
-5
lines changed

3 files changed

+67
-5
lines changed

docs/cli.rst

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -328,10 +328,10 @@ The CSV data that was piped into the script is available in the ``stdin`` table,
328328

329329
.. _cli_memory_schema_dump_save:
330330

331-
\-\-schema, \-\-dump and \-\-save
332-
---------------------------------
331+
\-\-schema, \-\-analyze, \-\-dump and \-\-save
332+
----------------------------------------------
333333

334-
To see the schema that will be created for a file or multiple files, use ``--schema``::
334+
To see the in-memory datbase schema that would be used for a file or for multiple files, use ``--schema``::
335335

336336
% sqlite-utils memory dogs.csv --schema
337337
CREATE TABLE [dogs] (
@@ -342,6 +342,33 @@ To see the schema that will be created for a file or multiple files, use ``--sch
342342
CREATE VIEW t1 AS select * from [dogs];
343343
CREATE VIEW t AS select * from [dogs];
344344

345+
You can run the equivalent of the :ref:`analyze-tables <cli_analyze_tables>` command using ``--analyze``::
346+
347+
% sqlite-utils memory dogs.csv --analyze
348+
dogs.id: (1/3)
349+
350+
Total rows: 2
351+
Null rows: 0
352+
Blank rows: 0
353+
354+
Distinct values: 2
355+
356+
dogs.name: (2/3)
357+
358+
Total rows: 2
359+
Null rows: 0
360+
Blank rows: 0
361+
362+
Distinct values: 2
363+
364+
dogs.age: (3/3)
365+
366+
Total rows: 2
367+
Null rows: 0
368+
Blank rows: 0
369+
370+
Distinct values: 2
371+
345372
You can output SQL that will both create the tables and insert the full data used to populate the in-memory database using ``--dump``::
346373

347374
% sqlite-utils memory dogs.csv --dump

sqlite_utils/cli.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,6 +1216,11 @@ def query(
12161216
type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
12171217
help="Save in-memory database to this file",
12181218
)
1219+
@click.option(
1220+
"--analyze",
1221+
is_flag=True,
1222+
help="Analyze resulting tables and output results",
1223+
)
12191224
@load_extension_option
12201225
def memory(
12211226
paths,
@@ -1236,6 +1241,7 @@ def memory(
12361241
schema,
12371242
dump,
12381243
save,
1244+
analyze,
12391245
load_extension,
12401246
):
12411247
"""Execute SQL query against an in-memory database, optionally populated by imported data
@@ -1265,8 +1271,8 @@ def memory(
12651271
sqlite-utils memory animals.csv --schema
12661272
"""
12671273
db = sqlite_utils.Database(memory=True)
1268-
# If --dump or --save used but no paths detected, assume SQL query is a path:
1269-
if (dump or save or schema) and not paths:
1274+
# If --dump or --save or --analyze used but no paths detected, assume SQL query is a path:
1275+
if (dump or save or schema or analyze) and not paths:
12701276
paths = [sql]
12711277
sql = None
12721278
for i, path in enumerate(paths):
@@ -1299,6 +1305,10 @@ def memory(
12991305
if not db[view_name].exists():
13001306
db.create_view(view_name, "select * from [{}]".format(csv_table))
13011307

1308+
if analyze:
1309+
_analyze(db, tables=None, columns=None, save=False)
1310+
return
1311+
13021312
if dump:
13031313
for line in db.conn.iterdump():
13041314
click.echo(line)
@@ -1922,6 +1932,10 @@ def analyze_tables(
19221932
"Analyze the columns in one or more tables"
19231933
db = sqlite_utils.Database(path)
19241934
_load_extensions(db, load_extension)
1935+
_analyze(db, tables, columns, save)
1936+
1937+
1938+
def _analyze(db, tables, columns, save):
19251939
if not tables:
19261940
tables = db.table_names()
19271941
todo = []

tests/test_cli_memory.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,3 +220,24 @@ def test_memory_no_detect_types(option):
220220
{"id": "1", "name": "Cleo", "weight": "45.5"},
221221
{"id": "2", "name": "Bants", "weight": "3.5"},
222222
]
223+
224+
225+
def test_memory_analyze():
226+
result = CliRunner().invoke(
227+
cli.cli,
228+
["memory", "-", "--analyze"],
229+
input="id,name\n1,Cleo\n2,Bants",
230+
)
231+
assert result.exit_code == 0
232+
assert result.output == (
233+
"stdin.id: (1/2)\n\n"
234+
" Total rows: 2\n"
235+
" Null rows: 0\n"
236+
" Blank rows: 0\n\n"
237+
" Distinct values: 2\n\n"
238+
"stdin.name: (2/2)\n\n"
239+
" Total rows: 2\n"
240+
" Null rows: 0\n"
241+
" Blank rows: 0\n\n"
242+
" Distinct values: 2\n\n"
243+
)

0 commit comments

Comments
 (0)