Skip to content

Commit ad523eb

Browse files
author
Ross Mechanic
authored
Fix out of memory exception in populate_history management command (#408)
* fix out-of-memory exceptions while generating historical models with management command * Refactored bulk_create_history to be on history manager * Added tests for bulk_history_create * Fixed linting errors * Added tests to command * Added to AUTHORS.rst and CHANGES.rst files * Use _default_manager when querying models * Updated gh number
1 parent 596b75c commit ad523eb

File tree

9 files changed

+238
-78
lines changed

9 files changed

+238
-78
lines changed

AUTHORS.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ Authors
3434
- Klaas van Schelven
3535
- Kris Neuharth
3636
- Maciej "RooTer" Urbański
37+
- Mark Davidoff
3738
- Martin Bachwerk
3839
- Marty Alchin
3940
- Mauricio de Abreu Antunes

CHANGES.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
Changes
22
=======
33

4+
Unreleased
5+
----------
6+
- Fixed out-of-memory exception when running populate_history management command (gh-408)
7+
48
2.1.0 (2018-06-04)
59
------------------
610
- Add ability to specify custom history_reason field (gh-379)

simple_history/exceptions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,8 @@
66
class MultipleRegistrationsError(Exception):
77
"""The model has been registered to have history tracking more than once"""
88
pass
9+
10+
11+
class NotHistoricalModelError(TypeError):
12+
"""No related history model found."""
13+
pass

simple_history/management/commands/_populate_utils.py

Lines changed: 0 additions & 30 deletions
This file was deleted.

simple_history/management/commands/populate_history.py

Lines changed: 65 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1+
import django
12
from django.apps import apps
23
from django.core.management.base import BaseCommand, CommandError
34

4-
from . import _populate_utils as utils
5-
from ... import models
5+
from ... import models, utils
6+
from ...exceptions import NotHistoricalModelError
67

78
get_model = apps.get_model
89

@@ -39,10 +40,12 @@ def add_arguments(self, parser):
3940
default=200,
4041
type=int,
4142
help='Set a custom batch size when bulk inserting historical '
42-
'records.',
43+
'records.'
4344
)
4445

4546
def handle(self, *args, **options):
47+
self.verbosity = options['verbosity']
48+
4649
to_process = set()
4750
model_strings = options.get('models', []) or args
4851

@@ -54,14 +57,16 @@ def handle(self, *args, **options):
5457
for model in models.registered_models.values():
5558
try: # avoid issues with mutli-table inheritance
5659
history_model = utils.get_history_model_for_model(model)
57-
except utils.NotHistorical:
60+
except NotHistoricalModelError:
5861
continue
5962
to_process.add((model, history_model))
6063
if not to_process:
61-
self.stdout.write(self.NO_REGISTERED_MODELS)
64+
if self.verbosity >= 1:
65+
self.stdout.write(self.NO_REGISTERED_MODELS)
6266

6367
else:
64-
self.stdout.write(self.COMMAND_HINT)
68+
if self.verbosity >= 1:
69+
self.stdout.write(self.COMMAND_HINT)
6570

6671
self._process(to_process, batch_size=options['batchsize'])
6772

@@ -94,11 +99,55 @@ def _model_from_natural_key(self, natural_key):
9499
" < {model} >\n".format(model=natural_key))
95100
try:
96101
history_model = utils.get_history_model_for_model(model)
97-
except utils.NotHistorical:
102+
except NotHistoricalModelError:
98103
raise ValueError(self.MODEL_NOT_HISTORICAL +
99104
" < {model} >\n".format(model=natural_key))
100105
return model, history_model
101106

107+
def _bulk_history_create(self, model, batch_size):
108+
"""Save a copy of all instances to the historical model.
109+
110+
:param model: Model you want to bulk create
111+
:param batch_size: number of models to create at once.
112+
:return:
113+
"""
114+
115+
instances = []
116+
history = utils.get_history_manager_for_model(model)
117+
if self.verbosity >= 1:
118+
self.stdout.write(
119+
"Starting bulk creating history models for {} instances {}-{}"
120+
.format(model, 0, batch_size)
121+
)
122+
123+
iterator_kwargs = {'chunk_size': batch_size} \
124+
if django.VERSION >= (2, 0, 0) else {}
125+
for index, instance in enumerate(model._default_manager.iterator(
126+
**iterator_kwargs
127+
)):
128+
# Can't Just pass batch_size to bulk_create as this can lead to
129+
# Out of Memory Errors as we load too many models into memory after
130+
# creating them. So we only keep batch_size worth of models in
131+
# historical_instances and clear them after we hit batch_size
132+
if index % batch_size == 0:
133+
134+
history.bulk_history_create(instances, batch_size=batch_size)
135+
136+
instances = []
137+
138+
if self.verbosity >= 1:
139+
self.stdout.write(
140+
"Finished bulk creating history models for {} "
141+
"instances {}-{}, starting next {}"
142+
.format(model, index - batch_size, index, batch_size)
143+
)
144+
145+
instances.append(instance)
146+
147+
# create any we didn't get in the last loop
148+
if instances:
149+
history.bulk_history_create(instances, batch_size=batch_size)
150+
102151
def _process(self, to_process, batch_size):
103152
for model, history_model in to_process:
104153
if history_model.objects.count():
@@ -107,6 +156,12 @@ def _process(self, to_process, batch_size):
107156
model=model,
108157
))
109158
continue
110-
self.stdout.write(self.START_SAVING_FOR_MODEL.format(model=model))
111-
utils.bulk_history_create(model, history_model, batch_size)
112-
self.stdout.write(self.DONE_SAVING_FOR_MODEL.format(model=model))
159+
if self.verbosity >= 1:
160+
self.stdout.write(self.START_SAVING_FOR_MODEL.format(
161+
model=model
162+
))
163+
self._bulk_history_create(model, batch_size)
164+
if self.verbosity >= 1:
165+
self.stdout.write(self.DONE_SAVING_FOR_MODEL.format(
166+
model=model
167+
))

simple_history/manager.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import unicode_literals
22

33
from django.db import models
4+
from django.utils.timezone import now
45

56

67
class HistoryDescriptor(object):
@@ -88,3 +89,19 @@ def _as_of_set(self, date):
8889
history_type='-').exists():
8990
continue
9091
yield last_change.instance
92+
93+
def bulk_history_create(self, objs, batch_size=None):
94+
"""Bulk create the history for the objects specified by objs"""
95+
96+
historical_instances = [
97+
self.model(
98+
history_date=getattr(instance, '_history_date', now()),
99+
history_user=getattr(instance, '_history_user', None),
100+
**{
101+
field.attname: getattr(instance, field.attname)
102+
for field in instance._meta.fields
103+
}
104+
) for instance in objs]
105+
106+
return self.model.objects.bulk_create(historical_instances,
107+
batch_size=batch_size)

simple_history/tests/tests/test_commands.py

Lines changed: 60 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
from contextlib import contextmanager
22
from datetime import datetime
33

4-
from six.moves import cStringIO as StringIO
5-
from django.test import TestCase
64
from django.core import management
5+
from django.test import TestCase
6+
from six.moves import cStringIO as StringIO
77

88
from simple_history import models as sh_models
99
from simple_history.management.commands import populate_history
10-
11-
from .. import models
10+
from ..models import Book, Poll, Restaurant
1211

1312

1413
@contextmanager
@@ -47,60 +46,60 @@ def test_bad_args(self):
4746
self.assertIn(msg, out.getvalue())
4847

4948
def test_auto_populate(self):
50-
models.Poll.objects.create(question="Will this populate?",
51-
pub_date=datetime.now())
52-
models.Poll.history.all().delete()
49+
Poll.objects.create(question="Will this populate?",
50+
pub_date=datetime.now())
51+
Poll.history.all().delete()
5352
management.call_command(self.command_name, auto=True,
5453
stdout=StringIO(), stderr=StringIO())
55-
self.assertEqual(models.Poll.history.all().count(), 1)
54+
self.assertEqual(Poll.history.all().count(), 1)
5655

5756
def test_populate_with_custom_batch_size(self):
58-
models.Poll.objects.create(question="Will this populate?",
59-
pub_date=datetime.now())
60-
models.Poll.history.all().delete()
57+
Poll.objects.create(question="Will this populate?",
58+
pub_date=datetime.now())
59+
Poll.history.all().delete()
6160
management.call_command(self.command_name, auto=True, batchsize=500,
6261
stdout=StringIO(), stderr=StringIO())
63-
self.assertEqual(models.Poll.history.all().count(), 1)
62+
self.assertEqual(Poll.history.all().count(), 1)
6463

6564
def test_specific_populate(self):
66-
models.Poll.objects.create(question="Will this populate?",
67-
pub_date=datetime.now())
68-
models.Poll.history.all().delete()
69-
models.Book.objects.create(isbn="9780007117116")
70-
models.Book.history.all().delete()
65+
Poll.objects.create(question="Will this populate?",
66+
pub_date=datetime.now())
67+
Poll.history.all().delete()
68+
Book.objects.create(isbn="9780007117116")
69+
Book.history.all().delete()
7170
management.call_command(self.command_name, "tests.book",
7271
stdout=StringIO(), stderr=StringIO())
73-
self.assertEqual(models.Book.history.all().count(), 1)
74-
self.assertEqual(models.Poll.history.all().count(), 0)
72+
self.assertEqual(Book.history.all().count(), 1)
73+
self.assertEqual(Poll.history.all().count(), 0)
7574

7675
def test_failing_wont_save(self):
77-
models.Poll.objects.create(question="Will this populate?",
78-
pub_date=datetime.now())
79-
models.Poll.history.all().delete()
76+
Poll.objects.create(question="Will this populate?",
77+
pub_date=datetime.now())
78+
Poll.history.all().delete()
8079
self.assertRaises(self.command_error,
8180
management.call_command, self.command_name,
8281
"tests.poll", "tests.invalid_model",
8382
stdout=StringIO(), stderr=StringIO())
84-
self.assertEqual(models.Poll.history.all().count(), 0)
83+
self.assertEqual(Poll.history.all().count(), 0)
8584

8685
def test_multi_table(self):
8786
data = {'rating': 5, 'name': "Tea 'N More"}
88-
models.Restaurant.objects.create(**data)
89-
models.Restaurant.updates.all().delete()
87+
Restaurant.objects.create(**data)
88+
Restaurant.updates.all().delete()
9089
management.call_command(self.command_name, 'tests.restaurant',
9190
stdout=StringIO(), stderr=StringIO())
92-
update_record = models.Restaurant.updates.all()[0]
91+
update_record = Restaurant.updates.all()[0]
9392
for attr, value in data.items():
9493
self.assertEqual(getattr(update_record, attr), value)
9594

9695
def test_existing_objects(self):
9796
data = {'rating': 5, 'name': "Tea 'N More"}
9897
out = StringIO()
99-
models.Restaurant.objects.create(**data)
100-
pre_call_count = models.Restaurant.updates.count()
98+
Restaurant.objects.create(**data)
99+
pre_call_count = Restaurant.updates.count()
101100
management.call_command(self.command_name, 'tests.restaurant',
102101
stdout=StringIO(), stderr=out)
103-
self.assertEqual(models.Restaurant.updates.count(), pre_call_count)
102+
self.assertEqual(Restaurant.updates.count(), pre_call_count)
104103
self.assertIn(populate_history.Command.EXISTING_HISTORY_FOUND,
105104
out.getvalue())
106105

@@ -111,3 +110,35 @@ def test_no_historical(self):
111110
stdout=out)
112111
self.assertIn(populate_history.Command.NO_REGISTERED_MODELS,
113112
out.getvalue())
113+
114+
def test_batch_processing_with_batch_size_less_than_total(self):
115+
data = [
116+
Poll(id=1, question='Question 1', pub_date=datetime.now()),
117+
Poll(id=2, question='Question 2', pub_date=datetime.now()),
118+
Poll(id=3, question='Question 3', pub_date=datetime.now()),
119+
Poll(id=4, question='Question 4', pub_date=datetime.now()),
120+
]
121+
Poll.objects.bulk_create(data)
122+
123+
management.call_command(self.command_name, auto=True, batchsize=3,
124+
stdout=StringIO(), stderr=StringIO())
125+
126+
self.assertEqual(Poll.history.count(), 4)
127+
128+
def test_stdout_not_printed_when_verbosity_is_0(self):
129+
out = StringIO()
130+
Poll.objects.create(question='Question 1', pub_date=datetime.now())
131+
132+
management.call_command(self.command_name, auto=True, batchsize=3,
133+
stdout=out, stderr=StringIO(), verbosity=0)
134+
135+
self.assertEqual(out.getvalue(), '')
136+
137+
def test_stdout_printed_when_verbosity_is_not_specified(self):
138+
out = StringIO()
139+
Poll.objects.create(question='Question 1', pub_date=datetime.now())
140+
141+
management.call_command(self.command_name, auto=True, batchsize=3,
142+
stdout=out, stderr=StringIO())
143+
144+
self.assertNotEqual(out.getvalue(), '')

0 commit comments

Comments
 (0)