From d1d780379ca5338fda046f363fcf2836f60dc96a Mon Sep 17 00:00:00 2001 From: phaer Date: Sun, 19 Sep 2021 09:46:56 +0200 Subject: [PATCH 1/5] Support lists in .extract_expand() --- sqlite_utils/db.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index fb1ae4f91..649ec3a12 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -1164,6 +1164,10 @@ def extract_expand( if isinstance(expanded, dict): new_pk = self.db[table].insert(expanded, pk="id", replace=True).last_pk self.update(row_pk, {fk_column: new_pk}) + elif isinstance(expanded, list): + for new_row in expanded: + new_pk = self.db[table].insert(new_row, pk="id", replace=True).last_pk + self.update(row_pk, {fk_column: new_pk}) # Can drop the original column now self.transform(drop=[column]) # And add that foreign key From d39ac10c035071abf3f8d2d604f5aa475485efab Mon Sep 17 00:00:00 2001 From: phaer Date: Sun, 19 Sep 2021 09:47:16 +0200 Subject: [PATCH 2/5] Throw ExpandError for unknown in .extract_expand() --- sqlite_utils/db.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index 649ec3a12..5995e4ae9 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -144,6 +144,10 @@ class InvalidColumns(Exception): pass +class ExpandError(Exception): + pass + + _COUNTS_TABLE_CREATE_SQL = """ CREATE TABLE IF NOT EXISTS [{}]( [table] TEXT PRIMARY KEY, @@ -1168,6 +1172,9 @@ def extract_expand( for new_row in expanded: new_pk = self.db[table].insert(new_row, pk="id", replace=True).last_pk self.update(row_pk, {fk_column: new_pk}) + else: + raise ExpandError("expanded value needs to be list or dict") + # Can drop the original column now self.transform(drop=[column]) # And add that foreign key From 70423bb37f5355d8f04b83b2b3c1db1337e27be1 Mon Sep 17 00:00:00 2001 From: phaer Date: Sun, 19 Sep 2021 12:16:15 +0200 Subject: [PATCH 3/5] Allow specifying our_id in Table.m2m() --- sqlite_utils/db.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index 5995e4ae9..3fe5455ce 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -2092,10 +2092,11 @@ def m2m( lookup=None, m2m_table=None, alter=False, + our_id = None ): if isinstance(other_table, str): other_table = self.db.table(other_table, pk=pk) - our_id = self.last_pk + our_id = our_id or self.last_pk if lookup is not None: assert record_or_iterable is None, "Provide lookup= or record, not both" else: From 17da3bf3434519cfb07c2ffbe155ea783faa366e Mon Sep 17 00:00:00 2001 From: phaer Date: Sun, 19 Sep 2021 12:22:18 +0200 Subject: [PATCH 4/5] Handle m2m and m21 in extract_expand() --- sqlite_utils/db.py | 37 +++++++++++++++++++++++++------- tests/test_extract.py | 49 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 7 deletions(-) diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index 3fe5455ce..b0eecbe2f 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -1160,6 +1160,8 @@ def extract_expand( ): "Use expand function to transform values in column and extract them into a new table" table = table or column + # Track whether we are creating a many-to-many or many-to-one relation + m2m, m21 = (False, False) fk_column = fk_column or "{}_id".format(table) self.add_column(fk_column, fk_column_type) for row_pk, row in self.pks_and_rows_where(): @@ -1169,16 +1171,37 @@ def extract_expand( new_pk = self.db[table].insert(expanded, pk="id", replace=True).last_pk self.update(row_pk, {fk_column: new_pk}) elif isinstance(expanded, list): - for new_row in expanded: - new_pk = self.db[table].insert(new_row, pk="id", replace=True).last_pk - self.update(row_pk, {fk_column: new_pk}) + if not len(expanded): + continue + elif isinstance(expanded[0], dict): + m2m = True + self.m2m(table, expanded, pk="id", our_id=row_pk, alter=True) + else: + m21 = True + pk_column = "{}_id".format(self.name) + new_rows = [ + { + "id": index, + pk_column: row_pk, + "value": value, + } + for index, value in enumerate(expanded, start=1) + ] + self.db[table].insert_all( + new_rows, + pk=('id', pk_column), + foreign_keys=[(pk_column, self.name)], + replace=True) else: raise ExpandError("expanded value needs to be list or dict") - # Can drop the original column now - self.transform(drop=[column]) - # And add that foreign key - self.add_foreign_key(fk_column, table, "id") + if m21 or m2m: + self.transform(drop=[column, fk_column]) + else: + # Can drop the original column now + self.transform(drop=[column]) + # And add that foreign key + self.add_foreign_key(fk_column, table, "id") return self def create_index(self, columns, index_name=None, unique=False, if_not_exists=False): diff --git a/tests/test_extract.py b/tests/test_extract.py index 2507c758b..9396be4ff 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -193,3 +193,52 @@ def test_extract_expand(fresh_db): table="trees", column="species_id", other_table="species", other_column="id" ) ] + + +def test_extract_expand_m21(fresh_db): + fresh_db["trees"].insert( + {"id": 1, "names": '["Palm", "Arecaceae"]'}, + pk="id", + ) + assert fresh_db.table_names() == ["trees"] + fresh_db["trees"].extract_expand( + "names", expand=json.loads, table="names", pk="id" + ) + assert set(fresh_db.table_names()) == {"trees", "names"} + assert list(fresh_db["trees"].rows) == [ + {"id": 1}, + ] + assert list(fresh_db["names"].rows) == [ + {"id": 1, "trees_id": 1, "value": "Palm"}, + {"id": 2, "trees_id": 1, "value": "Arecaceae"}, + ] + assert fresh_db["names"].foreign_keys == [ + ForeignKey( + table="names", column="trees_id", other_table="trees", other_column="id" + ) + ] + + +def test_extract_expand_m2m(fresh_db): + fresh_db["trees"].insert( + {"id": 1, "tags": '[{"id": 1, "name": "warm-climate"}, {"id": 2, "name": "green-leaves"}]'}, + pk="id", + ) + assert fresh_db.table_names() == ["trees"] + fresh_db["trees"].extract_expand( + "tags", expand=json.loads, table="tags", pk="id" + ) + assert set(fresh_db.table_names()) == {"trees", "tags", "tags_trees"} + assert list(fresh_db["trees"].rows) == [{"id": 1}] + assert list(fresh_db["tags"].rows) == [ + {"id": 1, "name": "warm-climate"}, + {"id": 2, "name": "green-leaves"}, + ] + assert list(fresh_db["tags_trees"].rows) == [ + {"trees_id": 1, "tags_id": 1}, + {"trees_id": 1, "tags_id": 2}, + ] + assert fresh_db["tags_trees"].foreign_keys == [ + ForeignKey(table="tags_trees", column="trees_id", other_table="trees", other_column="id"), + ForeignKey(table="tags_trees", column="tags_id", other_table="tags", other_column="id") + ] From 2840c697aa9817462d864ed5f8a7696d749fe039 Mon Sep 17 00:00:00 2001 From: Paul Haerle Date: Mon, 11 Oct 2021 17:31:41 +0200 Subject: [PATCH 5/5] WIP: set m21 true for empty lists... ...quick hack to test whether that handles datasets with some empty json arrays better --- sqlite_utils/db.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sqlite_utils/db.py b/sqlite_utils/db.py index b0eecbe2f..878a3bb4e 100644 --- a/sqlite_utils/db.py +++ b/sqlite_utils/db.py @@ -1172,6 +1172,7 @@ def extract_expand( self.update(row_pk, {fk_column: new_pk}) elif isinstance(expanded, list): if not len(expanded): + m21 = True continue elif isinstance(expanded[0], dict): m2m = True