Skip to content

Commit ab845a2

Browse files
committed
NF - ported and refactored nipy data utilities
1 parent 2753ba1 commit ab845a2

File tree

3 files changed

+732
-0
lines changed

3 files changed

+732
-0
lines changed

nibabel/data.py

Lines changed: 354 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,354 @@
1+
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
2+
# vi: set ft=python sts=4 ts=4 sw=4 et:
3+
"""
4+
Utilities to find files from NIPY data packages
5+
6+
"""
7+
import os
8+
from os.path import join as pjoin
9+
import glob
10+
import sys
11+
import ConfigParser
12+
from distutils.version import LooseVersion
13+
14+
from .environment import get_nipy_user_dir, get_nipy_system_dir
15+
16+
17+
DEFAULT_INSTALL_HINT = ('If you have the package, have you set the '
18+
'path to the package correctly?')
19+
20+
21+
class DataError(OSError):
22+
pass
23+
24+
25+
class Datasource(object):
26+
''' Simple class to add base path to relative path '''
27+
def __init__(self, base_path):
28+
''' Initialize datasource
29+
30+
Parameters
31+
----------
32+
base_path : str
33+
path to prepend to all relative paths
34+
35+
Examples
36+
--------
37+
>>> from os.path import join as pjoin
38+
>>> repo = Datasource(pjoin('a', 'path'))
39+
>>> fname = repo.get_filename('somedir', 'afile.txt')
40+
>>> fname == pjoin('a', 'path', 'somedir', 'afile.txt')
41+
True
42+
'''
43+
self.base_path = base_path
44+
45+
def get_filename(self, *path_parts):
46+
''' Prepend base path to `*path_parts`
47+
48+
We make no check whether the returned path exists.
49+
50+
Parameters
51+
----------
52+
*path_parts : sequence of strings
53+
54+
Returns
55+
-------
56+
fname : str
57+
result of ``os.path.join(*path_parts), with
58+
``self.base_path`` prepended
59+
60+
'''
61+
return pjoin(self.base_path, *path_parts)
62+
63+
def list_files(self, relative=True):
64+
''' Recursively list the files in the data source directory.
65+
66+
Parameters
67+
----------
68+
relative: bool, optional
69+
If True, path returned are relative to the base paht of
70+
the data source.
71+
72+
Returns
73+
-------
74+
file_list: list of strings
75+
List of the paths of all the files in the data source.
76+
77+
'''
78+
out_list = list()
79+
for base, dirs, files in os.walk(self.base_path):
80+
if relative:
81+
base = base[len(self.base_path)+1:]
82+
for filename in files:
83+
out_list.append(pjoin(base, filename))
84+
return out_list
85+
86+
87+
class VersionedDatasource(Datasource):
88+
''' Datasource with version information in config file
89+
90+
'''
91+
def __init__(self, base_path, config_filename=None):
92+
''' Initialize versioned datasource
93+
94+
We assume that there is a configuration file with version
95+
information in datasource directory tree.
96+
97+
The configuration file contains an entry like::
98+
99+
[DEFAULT]
100+
version = 0.3
101+
102+
The version should have at least a major and a minor version
103+
number in the form above.
104+
105+
Parameters
106+
----------
107+
base_path : str
108+
path to prepend to all relative paths
109+
config_filaname : None or str
110+
relative path to configuration file containing version
111+
112+
'''
113+
Datasource.__init__(self, base_path)
114+
if config_filename is None:
115+
config_filename = 'config.ini'
116+
self.config = ConfigParser.SafeConfigParser()
117+
cfg_file = self.get_filename(config_filename)
118+
readfiles = self.config.read(cfg_file)
119+
if not readfiles:
120+
raise DataError('Could not read config file %s' % cfg_file)
121+
try:
122+
self.version = self.config.get('DEFAULT', 'version')
123+
except ConfigParser.Error:
124+
raise DataError('Could not get version from %s' % cfg_file)
125+
version_parts = self.version.split('.')
126+
self.major_version = int(version_parts[0])
127+
self.minor_version = int(version_parts[1])
128+
self.version_no = float('%d.%d' % (self.major_version,
129+
self.minor_version))
130+
131+
132+
def _cfg_value(fname, section='DATA', value='path'):
133+
""" Utility function to fetch value from config file """
134+
configp = ConfigParser.ConfigParser()
135+
readfiles = configp.read(fname)
136+
if not readfiles:
137+
return ''
138+
try:
139+
return configp.get(section, value)
140+
except ConfigParser.Error:
141+
return ''
142+
143+
144+
def get_data_path():
145+
''' Return specified or guessed locations of NIPY data files
146+
147+
The algorithm is to return paths, extracted from strings, where
148+
strings are found in the following order:
149+
150+
#. The contents of environment variable ``NIPY_DATA_PATH``
151+
#. Any section = ``DATA``, key = ``path`` value in a ``config.ini``
152+
file in your nipy user directory (found with
153+
``get_nipy_user_dir()``)
154+
#. Any section = ``DATA``, key = ``path`` value in any files found
155+
with a ``sorted(glob.glob(os.path.join(sys_dir, '*.ini')))``
156+
search, where ``sys_dir`` is found with ``get_nipy_system_dir()``
157+
#. If ``sys.prefix`` is ``/usr``, we add
158+
``/usr/local/share/nipy``. We need this because Python 2.6 in
159+
Debian / Ubuntu does default installs to ``/usr/local``.
160+
#. The result of ``get_nipy_user_dir()``
161+
162+
Therefore, any paths found in ``NIPY_DATA_PATH`` will be searched
163+
before paths found in the user directory ``config.ini``
164+
165+
Parameters
166+
----------
167+
None
168+
169+
Returns
170+
-------
171+
paths : sequence of paths
172+
173+
Examples
174+
--------
175+
>>> pth = get_data_path()
176+
177+
Notes
178+
-----
179+
We have to add ``/usr/local/share/nipy`` if sys.prefix is ``/usr``,
180+
because Debian has patched distutils in Python 2.6 to do default
181+
distutils installs there:
182+
183+
* http://www.debian.org/doc/packaging-manuals/python-policy/ap-packaging_tools.html#s-distutils
184+
* http://www.mail-archive.com/[email protected]/msg05084.html
185+
'''
186+
paths = []
187+
try:
188+
var = os.environ['NIPY_DATA_PATH']
189+
except KeyError:
190+
pass
191+
else:
192+
if var:
193+
paths = var.split(os.path.pathsep)
194+
np_cfg = pjoin(get_nipy_user_dir(), 'config.ini')
195+
np_etc = get_nipy_system_dir()
196+
config_files = sorted(glob.glob(pjoin(np_etc, '*.ini')))
197+
for fname in [np_cfg] + config_files:
198+
var = _cfg_value(fname)
199+
if var:
200+
paths += var.split(os.path.pathsep)
201+
paths.append(pjoin(sys.prefix, 'share', 'nipy'))
202+
if sys.prefix == '/usr':
203+
paths.append(pjoin('/usr/local', 'share', 'nipy'))
204+
paths.append(pjoin(get_nipy_user_dir()))
205+
return paths
206+
207+
208+
def find_data_dir(root_dirs, *names):
209+
''' Find relative path given path prefixes to search
210+
211+
We raise a DataError if we can't find the relative path
212+
213+
Parameters
214+
----------
215+
root_dirs : sequence of strings
216+
sequence of paths in which to search for data directory
217+
*names : sequence of strings
218+
sequence of strings naming directory to find. The name to search
219+
for is given by ``os.path.join(*names)``
220+
221+
Returns
222+
-------
223+
data_dir : str
224+
full path (root path added to `*names` above)
225+
226+
'''
227+
ds_relative = pjoin(*names)
228+
for path in root_dirs:
229+
pth = pjoin(path, ds_relative)
230+
if os.path.isdir(pth):
231+
return pth
232+
raise DataError('Could not find datasource "%s" in data path "%s"' %
233+
(ds_relative,
234+
os.path.pathsep.join(root_dirs)))
235+
236+
237+
def make_datasource(pkg_def, **kwargs):
238+
''' Return datasource defined by `pkg_def` as found in `data_path`
239+
240+
`data_path` is the only allowed keyword argument.
241+
242+
`pkg_def` is a dictionary with at least one key - 'name'. 'name' is a
243+
string which may be contain hyphens e.g. ``nipy-templates``.
244+
245+
The relative path to the data is found with::
246+
247+
names = pkg_def['name'].split('-')
248+
rel_path = os.path.join(names)
249+
250+
We search for this relative path in the list of paths given by `data_path`.
251+
By default `data_path` is given by ``get_data_path()`` in this module.
252+
253+
If we can't find the relative path, raise a DataError
254+
255+
Parameters
256+
----------
257+
pkg_def : dict
258+
dict containing at least the key 'name'. If the name contains hyphens
259+
these as taken as directory separators, so we find the relative path to
260+
the data with ``rel_pth = pkg_def['name'].replace('-', os.path.sep)``.
261+
`pkg_def` can also contain a key 'install hint' that we use in the
262+
returned error message from trying to use the resulting datasource
263+
data_path : sequence of strings or None, optional
264+
sequence of paths in which to search for data. If None (the
265+
default), then use ``get_data_path()``
266+
267+
Returns
268+
-------
269+
datasource : ``VersionedDatasource``
270+
An initialized ``VersionedDatasource`` instance
271+
'''
272+
if any(key for key in kwargs if key != 'data_path'):
273+
raise ValueError('Unexpected keyword argument(s)')
274+
data_path = kwargs.get('data_path')
275+
if data_path is None:
276+
data_path = get_data_path()
277+
name = pkg_def['name']
278+
names = name.split('-')
279+
try:
280+
pth = find_data_dir(data_path, *names)
281+
except DataError, exception:
282+
pth = [pjoin(this_data_path, *names)
283+
for this_data_path in data_path]
284+
pkg_hint = pkg_def.get('install hint', DEFAULT_INSTALL_HINT)
285+
msg = '''%(exc)s;
286+
Is it possible you have not installed a data package?
287+
From the names, maybe you need data package "%(name)s"?
288+
289+
%(pkg_hint)s''' % dict(exc=exception,
290+
name=name,
291+
pkg_hint=pkg_hint)
292+
raise DataError(msg)
293+
return VersionedDatasource(pth)
294+
295+
296+
class Bomber(object):
297+
''' Class to raise an informative error when used '''
298+
def __init__(self, name, msg):
299+
self.name = name
300+
self.msg = msg
301+
302+
def __getattr__(self, attr_name):
303+
''' Raise informative error accessing not-found attributes '''
304+
raise DataError(
305+
'Trying to access attribute "%s" '
306+
'of non-existent data "%s"\n\n%s\n' %
307+
(attr_name, self.name, self.msg))
308+
309+
310+
def datasource_or_bomber(pkg_def, **options):
311+
''' Return a viable datasource or a Bomber
312+
313+
This is to allow module level creation of datasource objects. We
314+
create the objects, so that, if the data exist, and are the correct
315+
version, the objects are valid datasources, otherwise, they
316+
raise an error on access, warning about the lack of data or the
317+
version numbers.
318+
319+
The parameters are as for ``make_datasource`` in this module.
320+
321+
Parameters
322+
----------
323+
pkg_def : dict
324+
dict containing at least key 'name'. Can optioanlly have key 'install
325+
hint' (for helpful error messages) and 'min version' giving the minimum
326+
necessary version string for the package.
327+
data_path : sequence of strings or None, optional
328+
329+
Returns
330+
-------
331+
ds : datasource or ``Bomber`` instance
332+
'''
333+
name = pkg_def['name']
334+
version = pkg_def.get('min version')
335+
pkg_hint = pkg_def.get('install hint', DEFAULT_INSTALL_HINT)
336+
names = name.split('-')
337+
rel_path = os.path.sep.join(names)
338+
try:
339+
ds = make_datasource(pkg_def, **options)
340+
except DataError, exception:
341+
return Bomber(rel_path, exception)
342+
# check version
343+
if (version is None or
344+
LooseVersion(ds.version) >= LooseVersion(version)):
345+
return ds
346+
pkg_name = '-'.join(names)
347+
msg = ('%(name)s is version %(pkg_version)s but we need '
348+
'version >= %(req_version)s\n\n%(pkg_hint)s' %
349+
dict(name=name,
350+
pkg_version=ds.version,
351+
req_version=version,
352+
pkg_hint=pkg_hint))
353+
return Bomber(rel_path, DataError(msg))
354+

0 commit comments

Comments
 (0)