Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
69c1835
Add basic implementation of mimir logger
dmitriy-serdyuk Mar 1, 2016
584bd85
Add requirements and travis tests
dmitriy-serdyuk Mar 1, 2016
5053f53
Fix unpickling of mimir logger
dmitriy-serdyuk Mar 1, 2016
0a2945a
Add cython to requerements
dmitriy-serdyuk Mar 4, 2016
46f27ed
Add other requirements
dmitriy-serdyuk Mar 1, 2016
739bea6
Fix typo, update a logger dictionary and add documentation
dmitriy-serdyuk Mar 1, 2016
b9033bb
Inherit from the right class
dmitriy-serdyuk Mar 2, 2016
d95fb6a
Add requirements for travis
dmitriy-serdyuk Mar 2, 2016
f00ed6d
Remove conflicting packages
dmitriy-serdyuk Mar 2, 2016
805eb95
Fix flushing
dmitriy-serdyuk Mar 2, 2016
d225a6f
Serialize 1d numpy arrays in readable format
dmitriy-serdyuk Mar 2, 2016
eda6785
Move flushing functional to the JSONLog class
dmitriy-serdyuk Mar 2, 2016
3e5a667
Load log when unpickling
dmitriy-serdyuk Mar 2, 2016
a62b0be
Rename JSON log and do not print to stdout
dmitriy-serdyuk Mar 2, 2016
e9996b7
Make arguments to mimir log overridable
dmitriy-serdyuk Mar 2, 2016
a4b8d1f
Make log a context manager
dmitriy-serdyuk Mar 2, 2016
39b6573
Reopen json logger when enter context
dmitriy-serdyuk Mar 2, 2016
fc1b30b
Remove hacky 1d array serialization (fixed in mimir)
dmitriy-serdyuk Mar 3, 2016
d8aa85f
Do not open the logger in constructor
dmitriy-serdyuk Mar 3, 2016
ac436a4
Refactor opening
dmitriy-serdyuk Mar 3, 2016
0aa18d6
Fix error message
dmitriy-serdyuk Mar 3, 2016
2fdb479
Refactor json log
dmitriy-serdyuk Mar 3, 2016
bcdbb70
Test should be run with float64
dmitriy-serdyuk Mar 3, 2016
4e397d1
Try to remove Cython from Travis requieremnts
dmitriy-serdyuk Mar 3, 2016
9e9932e
Remove dependencies of mimir from blocks ones
dmitriy-serdyuk Mar 3, 2016
0c56bb3
Allow passing arguments to log objects
dmitriy-serdyuk Mar 3, 2016
78c7058
Add documentation
dmitriy-serdyuk Mar 3, 2016
b593da8
Fix blocksrc in travis
dmitriy-serdyuk Mar 4, 2016
05ba5e9
Use bigger maxlen by default
dmitriy-serdyuk Mar 4, 2016
4ebe84b
Move default arguments to signature of json logger
dmitriy-serdyuk Mar 4, 2016
a6a70c2
Fix extracting rows from the log
dmitriy-serdyuk Mar 8, 2016
95fe8b1
Adapt documentation
dmitriy-serdyuk Mar 8, 2016
b5fd90d
Add more examples for jq
dmitriy-serdyuk Mar 8, 2016
cc69f50
Remove json log file if already exists
dmitriy-serdyuk Mar 8, 2016
0e57c8d
Fix json log length computation
dmitriy-serdyuk Mar 8, 2016
5674e30
Try to fix this indexing hell
dmitriy-serdyuk Mar 8, 2016
558b32b
Refactor local caching code
dmitriy-serdyuk Apr 20, 2016
126be43
Make default maxlen bigger
dmitriy-serdyuk Apr 20, 2016
95ffa5a
Update local cache with as meny sequences as needed
dmitriy-serdyuk Apr 20, 2016
99ae6c8
Fix iteration number
dmitriy-serdyuk Apr 20, 2016
3e240e8
Add forgotten + 1
dmitriy-serdyuk Apr 21, 2016
1438756
Fix cache extend
dmitriy-serdyuk Apr 24, 2016
047a3b3
Fix length
dmitriy-serdyuk Apr 24, 2016
27e2d7d
Do not use iterations done in json log
dmitriy-serdyuk Apr 24, 2016
d08132c
Close log properly and check length only for default log
dmitriy-serdyuk Apr 25, 2016
e8e14ce
Wrap test
dmitriy-serdyuk Apr 25, 2016
28233fe
Open and close log artificially
dmitriy-serdyuk Apr 25, 2016
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,18 @@ matrix:
env: TESTS=blocks FLOATX=float32
- python: 2.7
env: TESTS=blocks FLOATX=float32 DB=sqlite
- python: 2.7
env: TESTS=blocks FLOATX=float32 DB=mimir
- python: 3.4
env: TESTS=blocks FLOATX=float64
- python: 2.7
env: TESTS=blocks FLOATX=float64 DB=mimir
- python: 2.7
env: TESTS=blocks-examples FLOATX=float32
- python: 2.7
env: TESTS=blocks-examples FLOATX=float32 DB=sqlite
- python: 2.7
env: TESTS=blocks-examples FLOATX=float32 DB=mimir
- python: 3.4
env: TESTS=blocks-examples FLOATX=float64
before_install:
Expand All @@ -40,6 +46,10 @@ script:
- export THEANO_FLAGS=floatX=$FLOATX,optimizer=fast_compile
- export FUEL_FLOATX=$FLOATX
- "if [[ $DB == 'sqlite' ]]; then echo 'log_backend: sqlite' > ~/.blocksrc; fi"
- |
if [[ $DB == 'mimir' ]]; then
echo 'log_backend: mimir' > ~/.blocksrc;
fi
- # Running nose2 within coverage makes imports count towards coverage
- function fail { export FAILED=1; }
- |
Expand Down
10 changes: 8 additions & 2 deletions blocks/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,13 @@
.. option:: log_backend

The backend to use for logging experiments. Defaults to `python`, which
stores the log as a Python object in memory. The other option is
`sqlite`.
stores the log as a Python object in memory. The other options are
`sqlite` and `mimir`.

.. option:: log_arguments

The arguments to pass to the logger. Defaults to an empty dictionary.
This option can be used to configure `:class:~.log.json.JSONLinesLog`.

.. option:: sqlite_database, BLOCKS_SQLITEDB

Expand Down Expand Up @@ -179,6 +184,7 @@ def str_or_none(val):
config.add_config('profile', type_=bool_, default=False,
env_var='BLOCKS_PROFILE')
config.add_config('log_backend', type_=str, default='python')
config.add_config('log_arguments', type_=dict, default={})
config.add_config('sqlite_database', type_=str,
default=os.path.expanduser('~/blocks_log.sqlite'),
env_var='BLOCKS_SQLITEDB')
Expand Down
4 changes: 3 additions & 1 deletion blocks/log/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from .log import TrainingLog
from .sqlite import SQLiteLog
from .json import JSONLinesLog

BACKENDS = {
'python': TrainingLog,
'sqlite': SQLiteLog
'sqlite': SQLiteLog,
'mimir': JSONLinesLog
}
133 changes: 133 additions & 0 deletions blocks/log/json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import os.path
from collections import deque
from six.moves import range
from mimir import Logger
from mimir.logger import _Logger

from .log import TrainingLogBase


class PicklableLogger(_Logger):
"""A picklable wrapper around mimir logger.

This class is a picklable version of `:class:mimir.Logger`.

"""
def __init__(self, **kwargs):
self.logger_kwargs = kwargs
self.opened = False

def open(self):
if not self.opened:
logger = Logger(**self.logger_kwargs)
self.__dict__.update(logger.__dict__)
self.load(self.logger_kwargs['filename'])
self.opened = True

def close(self):
self.opened = False
super(PicklableLogger, self).close()

def __setstate__(self, state):
self.logger_kwargs = state
self.opened = False
self.open()

def __getstate__(self):
return self.logger_kwargs


class JSONLinesLog(TrainingLogBase):
"""A log stored in gzipped JSON Lines format.

Each line of the log is a dictionary of a form
`{<iteration>: {<record_name>: <recodr_value>...}}`.

Examples
--------

Analysis of the log can be easily done with
`jq <https://stedolan.github.io/jq/>`__

.. code:: bash
gunzip -c log.jsonl.gz | jq '.reports.train_error'

# Or equivalently
zcat log.jsonl.gz | jq '.reports.train_error'

# To filter out null entires
zcat log.jsnol.gz | jq '.reports.train_error | select(.>0)'

# To extract minimal training error
gunzip -c log.jsonl.gz | jq -s '. | map(.reports.true_cost) | min'

# To include the iteration with minimal training error
gunzip -c log.jsonl.gz | jq -s '. |
map([.iterations_done, .reports.true_cost]) | min_by(.[1])'

"""
def __init__(self, filename='log.jsonl.gz', maxlen=101, formatter=None,
**kwargs):
self.status = {}
super(JSONLinesLog, self).__init__()
if os.path.isfile(filename):
os.remove(filename)
self.logger = PicklableLogger(
filename=filename, maxlen=maxlen, formatter=formatter, **kwargs)
self.local_cache = deque()

def flush(self, iterations_done):
if iterations_done < 0:
raise ValueError
if len(self.local_cache) > 0:
self.logger.log({'iterations_done': iterations_done,
'reports': self.local_cache.popleft()})

def __getitem__(self, time):
self._check_time(time)
logger_len = self.inner_logger_len()
total_length = logger_len + len(self.local_cache)

# Flush local cache
while len(self.local_cache) > 1:
self.flush(total_length - len(self.local_cache))
logger_len = self.inner_logger_len()

if time >= total_length:
# Need to create new item in local cache
self.local_cache.extend(
[{} for _ in range(time - total_length + 1)])
if time < logger_len:
try:
if not self.logger[time]['iterations_done'] == time:
raise ValueError('iterations done')
return self.logger[time]['reports']
except IndexError:
raise ValueError(
'cannot get past log entries for JSON log, max log length '
'in memory is: {}'.format(
self.logger.logger_kwargs['maxlen']))
if time >= logger_len:
return self.local_cache[time - logger_len]

def inner_logger_len(self):
try:
return len(self.logger)
except AttributeError:
return 0

def __len__(self):
return self.inner_logger_len() + len(self.local_cache)

def __setitem__(self, time, value):
raise ValueError('cannot manually change JSON Lines log')

def __enter__(self):
self.logger.open()

def __exit__(self, exc_type, exc_val, exc_tb):
self.flush(self.status.get('iterations_done', -1))
self.logger.close()

def __iter__(self):
return iter([self[i] for i in range(len(self))])
6 changes: 6 additions & 0 deletions blocks/log/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ def __init__(self, uuid=None):
'resumed_from': None
})

def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):
pass

@property
def h_uuid(self):
"""Return a hexadecimal version of the UUID bytes.
Expand Down
4 changes: 2 additions & 2 deletions blocks/main_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def __init__(self, algorithm, data_stream, model=None, log=None,
if log is None:
if log_backend is None:
log_backend = config.log_backend
log = BACKENDS[log_backend]()
log = BACKENDS[log_backend](**config.log_arguments)
if extensions is None:
extensions = []

Expand Down Expand Up @@ -157,7 +157,7 @@ def run(self):
set(self.algorithm.parameters)):
logger.warning("different parameters for model and algorithm")

with change_recursion_limit(config.recursion_limit):
with change_recursion_limit(config.recursion_limit), self.log:
self.original_sigint_handler = signal.signal(
signal.SIGINT, self._handle_epoch_interrupt)
self.original_sigterm_handler = signal.signal(
Expand Down
8 changes: 8 additions & 0 deletions docs/api/log.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ Dictionary backend
:undoc-members:
:show-inheritance:

JSON Lines backend
------------------

.. automodule:: blocks.log.json
:members:
:undoc-members:
:show-inheritance:

Sqlite backend
--------------

Expand Down
4 changes: 4 additions & 0 deletions req-travis-pip.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@ git+https://github.com/Theano/Theano.git#egg=theano
--allow-external fuel
--allow-unverified fuel
git+https://github.com/mila-udem/fuel#egg=fuel

--allow-external mimir
--allow-unverified mimir
git+https://github.com/bartvm/mimir#egg=mimir
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ git+https://github.com/Theano/Theano.git#egg=theano
--allow-external fuel
--allow-unverified fuel
git+https://github.com/mila-udem/fuel#egg=fuel

--allow-external mimir
--allow-unverified mimir
git+https://github.com/bartvm/mimir#egg=mimir
8 changes: 6 additions & 2 deletions tests/extensions/test_saveload.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ def test_save_and_load(self):
algorithm=self.algorithm,
extensions=[Load('myweirdmodel.tar')]
)
new_main_loop.extensions[0].main_loop = new_main_loop
new_main_loop._run_extensions('before_training')
with new_main_loop.log:
new_main_loop.extensions[0].main_loop = new_main_loop
new_main_loop._run_extensions('before_training')
assert_allclose(self.W.get_value(), old_value)

def test_load_log_and_iteration_state(self):
Expand All @@ -68,6 +69,9 @@ def test_load_log_and_iteration_state(self):
)
new_main_loop.extensions[0].main_loop = new_main_loop
new_main_loop._run_extensions('before_training')
with new_main_loop.log:
# Open and close log
pass
# Check the log
new_keys = sorted(new_main_loop.log.status.keys())
old_keys = sorted(self.main_loop.log.status.keys())
Expand Down
55 changes: 28 additions & 27 deletions tests/extensions/test_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,33 +90,34 @@ def test_track_the_best():
extension = TrackTheBest("cost")
extension.main_loop = main_loop

main_loop.status['epochs_done'] += 1
main_loop.status['iterations_done'] += 10
main_loop.log.current_row['cost'] = 5
extension.dispatch('after_epoch')
assert main_loop.status['best_cost'] == 5
assert main_loop.log.current_row['cost_best_so_far']

main_loop.status['epochs_done'] += 1
main_loop.status['iterations_done'] += 10
main_loop.log.current_row['cost'] = 6
extension.dispatch('after_epoch')
assert main_loop.status['best_cost'] == 5
assert main_loop.log.current_row.get('cost_best_so_far', None) is None

main_loop.status['epochs_done'] += 1
main_loop.status['iterations_done'] += 10
main_loop.log.current_row['cost'] = 5
extension.dispatch('after_epoch')
assert main_loop.status['best_cost'] == 5
assert main_loop.log.current_row.get('cost_best_so_far', None) is None

main_loop.status['epochs_done'] += 1
main_loop.status['iterations_done'] += 10
main_loop.log.current_row['cost'] = 4
extension.dispatch('after_epoch')
assert main_loop.status['best_cost'] == 4
assert main_loop.log.current_row['cost_best_so_far']
with main_loop.log:
main_loop.status['epochs_done'] += 1
main_loop.status['iterations_done'] += 10
main_loop.log.current_row['cost'] = 5
extension.dispatch('after_epoch')
assert main_loop.status['best_cost'] == 5
assert main_loop.log.current_row['cost_best_so_far']

main_loop.status['epochs_done'] += 1
main_loop.status['iterations_done'] += 10
main_loop.log.current_row['cost'] = 6
extension.dispatch('after_epoch')
assert main_loop.status['best_cost'] == 5
assert main_loop.log.current_row.get('cost_best_so_far', None) is None

main_loop.status['epochs_done'] += 1
main_loop.status['iterations_done'] += 10
main_loop.log.current_row['cost'] = 5
extension.dispatch('after_epoch')
assert main_loop.status['best_cost'] == 5
assert main_loop.log.current_row.get('cost_best_so_far', None) is None

main_loop.status['epochs_done'] += 1
main_loop.status['iterations_done'] += 10
main_loop.log.current_row['cost'] = 4
extension.dispatch('after_epoch')
assert main_loop.status['best_cost'] == 4
assert main_loop.log.current_row['cost_best_so_far']


class WriteCostExtension(TrainingExtension):
Expand Down
Loading