Skip to content

Commit 419f270

Browse files
committed
Fix bulk create with history with duplicates
Issue: 1032
1 parent 529dbe2 commit 419f270

File tree

4 files changed

+51
-45
lines changed

4 files changed

+51
-45
lines changed

AUTHORS.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ Authors
119119
- Tommy Beadle (`tbeadle <https://github.com/tbeadle>`_)
120120
- Trey Hunner (`treyhunner <https://github.com/treyhunner>`_)
121121
- Ulysses Vilela
122+
- Weston Chan
122123
- `vnagendra <https://github.com/vnagendra>`_
123124
- `yakimka <https://github.com/yakimka>`_
124125
- `Paulo Peres <https://github.com/PauloPeres>`_

CHANGES.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Unreleased
66

77
- Fixed typos in the docs
88
- Removed n+1 query from ``bulk_create_with_history`` utility (gh-975)
9+
- Removed extra history objects when using ``bulk_create_with_history`` with duplicates (gh-1032)
910
- Started using ``exists`` query instead of ``count`` in ``populate_history`` command (gh-982)
1011
- Added support for Django 4.1 (gh-1021)
1112

simple_history/tests/tests/test_utils.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from unittest.mock import Mock, patch
33

44
from django.contrib.auth import get_user_model
5+
from django.core.exceptions import FieldError
56
from django.db import IntegrityError, transaction
67
from django.test import TestCase, TransactionTestCase, override_settings
78
from django.utils import timezone
@@ -124,8 +125,8 @@ def test_bulk_create_history_with_default_date(self):
124125
all([history.history_date == date for history in Poll.history.all()])
125126
)
126127

127-
def test_bulk_create_history_num_queries_is_two(self):
128-
with self.assertNumQueries(2):
128+
def test_bulk_create_history_num_queries_is_three(self):
129+
with self.assertNumQueries(3):
129130
bulk_create_with_history(self.data, Poll)
130131

131132
def test_bulk_create_history_on_model_without_history_raises_error(self):
@@ -138,7 +139,7 @@ def test_bulk_create_history_on_model_without_history_raises_error(self):
138139
bulk_create_with_history(self.data, Place)
139140

140141
def test_num_queries_when_batch_size_is_less_than_total(self):
141-
with self.assertNumQueries(6):
142+
with self.assertNumQueries(7):
142143
bulk_create_with_history(self.data, Poll, batch_size=2)
143144

144145
def test_bulk_create_history_with_batch_size(self):
@@ -211,7 +212,7 @@ def mock_bulk_create(*args, **kwargs):
211212
with patch.object(
212213
Poll._default_manager, "bulk_create", side_effect=mock_bulk_create
213214
):
214-
with self.assertNumQueries(3):
215+
with self.assertNumQueries(4):
215216
result = bulk_create_with_history(objects, Poll)
216217
self.assertEqual(
217218
[poll.question for poll in result], [poll.question for poll in objects]
@@ -262,7 +263,7 @@ def test_bulk_create_history_rolls_back_when_last_exists(self):
262263
self.assertEqual(Poll.history.count(), 1)
263264

264265
def test_bulk_create_fails_with_wrong_model(self):
265-
with self.assertRaises(AttributeError):
266+
with self.assertRaises(FieldError):
266267
bulk_create_with_history(self.data, Document)
267268

268269
self.assertEqual(Poll.objects.count(), 0)
@@ -271,14 +272,11 @@ def test_bulk_create_fails_with_wrong_model(self):
271272
@patch("simple_history.utils.get_history_manager_for_model")
272273
def test_bulk_create_no_ids_return(self, hist_manager_mock):
273274
objects = [Place(id=1, name="Place 1")]
274-
model = Mock(
275-
_default_manager=Mock(
276-
bulk_create=Mock(return_value=[Place(name="Place 1")]),
277-
filter=Mock(return_value=Mock(order_by=Mock(return_value=objects))),
278-
),
279-
_meta=Mock(get_fields=Mock(return_value=[])),
275+
Place._default_manager.bulk_create = Mock(
276+
side_effect=Place._default_manager.bulk_create,
277+
return_value=[Place(name="Place 1")],
280278
)
281-
result = bulk_create_with_history(objects, model)
279+
result = bulk_create_with_history(objects, Place)
282280
self.assertEqual(result, objects)
283281
hist_manager_mock().bulk_history_create.assert_called_with(
284282
objects,
@@ -288,6 +286,18 @@ def test_bulk_create_no_ids_return(self, hist_manager_mock):
288286
default_date=None,
289287
)
290288

289+
def test_bulk_create_duplicate_objects_with_no_ids_return(self):
290+
Street._default_manager.create(id=1, name="Duplicate Place")
291+
obj = Street(name="Duplicate Place")
292+
Street._default_manager.bulk_create = Mock(
293+
side_effect=Street._default_manager.bulk_create,
294+
return_value=[Street(name="Duplicate Place")],
295+
)
296+
result = bulk_create_with_history([obj], Street)
297+
self.assertEqual(result, [Street._default_manager.last()])
298+
self.assertEqual(Street._default_manager.count(), 2)
299+
self.assertEqual(Street.log.count(), 2)
300+
291301

292302
class BulkCreateWithManyToManyField(TestCase):
293303
def setUp(self):

simple_history/utils.py

Lines changed: 27 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -93,58 +93,52 @@ def bulk_create_with_history(
9393
history_manager = get_history_manager_for_model(model)
9494
model_manager = model._default_manager
9595

96-
second_transaction_required = True
9796
with transaction.atomic(savepoint=False):
97+
# Retrieve any duplicate existing objects,
98+
# so we can exclude them when finding the created objects
99+
cumulative_filter = None
100+
obj_when_list = []
101+
for i, obj in enumerate(objs):
102+
attributes = {
103+
k: v
104+
for k, v in model_to_dict(obj, exclude=exclude_fields).items()
105+
if v is not None
106+
}
107+
# https://stackoverflow.com/a/49625179/1960509
108+
# DEV: If an attribute has `then` as a key
109+
# then they'll also run into issues with `bulk_update`
110+
# due to shared implementation
111+
# https://github.com/django/django/blob/4.0.4/django/db/models/query.py#L624-L638
112+
obj_when_list.append(When(**attributes, then=i))
113+
q = Q(**attributes)
114+
cumulative_filter = (cumulative_filter | q) if cumulative_filter else q
115+
existing_objs_ids = list(
116+
model_manager.filter(cumulative_filter).values_list("pk", flat=True)
117+
)
98118
objs_with_id = model_manager.bulk_create(
99119
objs, batch_size=batch_size, ignore_conflicts=ignore_conflicts
100120
)
101121
if objs_with_id and objs_with_id[0].pk and not ignore_conflicts:
102-
second_transaction_required = False
103122
history_manager.bulk_history_create(
104123
objs_with_id,
105124
batch_size=batch_size,
106125
default_user=default_user,
107126
default_change_reason=default_change_reason,
108127
default_date=default_date,
109128
)
110-
if second_transaction_required:
111-
with transaction.atomic(savepoint=False):
112-
# Generate a common query to avoid n+1 selections
113-
# https://github.com/jazzband/django-simple-history/issues/974
114-
cumulative_filter = None
115-
obj_when_list = []
116-
for i, obj in enumerate(objs_with_id):
117-
attributes = dict(
118-
filter(
119-
lambda x: x[1] is not None,
120-
model_to_dict(obj, exclude=exclude_fields).items(),
121-
)
122-
)
123-
q = Q(**attributes)
124-
cumulative_filter = (cumulative_filter | q) if cumulative_filter else q
125-
# https://stackoverflow.com/a/49625179/1960509
126-
# DEV: If an attribute has `then` as a key
127-
# then they'll also run into issues with `bulk_update`
128-
# due to shared implementation
129-
# https://github.com/django/django/blob/4.0.4/django/db/models/query.py#L624-L638
130-
obj_when_list.append(When(**attributes, then=i))
131-
obj_list = (
132-
list(
133-
model_manager.filter(cumulative_filter).order_by(
134-
Case(*obj_when_list)
135-
)
136-
)
137-
if objs_with_id
138-
else []
129+
elif objs_with_id:
130+
objs_with_id = list(
131+
model_manager.filter(cumulative_filter)
132+
.exclude(pk__in=existing_objs_ids)
133+
.order_by(Case(*obj_when_list))
139134
)
140135
history_manager.bulk_history_create(
141-
obj_list,
136+
objs_with_id,
142137
batch_size=batch_size,
143138
default_user=default_user,
144139
default_change_reason=default_change_reason,
145140
default_date=default_date,
146141
)
147-
objs_with_id = obj_list
148142
return objs_with_id
149143

150144

0 commit comments

Comments
 (0)