Skip to content

Commit fe36d2b

Browse files
committed
Added ability to create ECOSTRESS database (closes #89)
1 parent af0d62c commit fe36d2b

File tree

2 files changed

+242
-0
lines changed

2 files changed

+242
-0
lines changed

spectral/database/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,4 @@
3333
from __future__ import division, print_function, unicode_literals
3434

3535
from .aster import AsterDatabase
36+
from .ecostress import EcostressDatabase

spectral/database/ecostress.py

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
#########################################################################
2+
#
3+
# ecostress.py - This file is part of the Spectral Python (SPy) package.
4+
#
5+
# Copyright (C) 2010 Thomas Boggs
6+
#
7+
# Spectral Python is free software; you can redistribute it and/
8+
# or modify it under the terms of the GNU General Public License
9+
# as published by the Free Software Foundation; either version 2
10+
# of the License, or (at your option) any later version.
11+
#
12+
# Spectral Python is distributed in the hope that it will be useful,
13+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
# GNU General Public License for more details.
16+
#
17+
# You should have received a copy of the GNU General Public License
18+
# along with this software; if not, write to
19+
#
20+
# Free Software Foundation, Inc.
21+
# 59 Temple Place, Suite 330
22+
# Boston, MA 02111-1307
23+
# USA
24+
#
25+
#########################################################################
26+
#
27+
# Send comments to:
28+
# Thomas Boggs, [email protected]
29+
#
30+
'''Code for reading and managing ECOSTRESS spectral library data.'''
31+
32+
from __future__ import division, print_function, unicode_literals
33+
34+
import itertools
35+
36+
from spectral.utilities.python23 import IS_PYTHON3
37+
from .aster import AsterDatabase, Signature
38+
39+
if IS_PYTHON3:
40+
readline = lambda fin: fin.readline()
41+
open_file = lambda filename: open(filename, encoding='iso-8859-1')
42+
else:
43+
readline = lambda fin: fin.readline().decode('iso-8859-1')
44+
open_file = lambda filename: open(filename)
45+
46+
47+
def read_ecostress_file(filename):
48+
'''Reads an ECOSTRESS v1 spectrum file.'''
49+
50+
lines = open_file(filename).readlines()
51+
if not IS_PYTHON3:
52+
lines = [line.decode('iso-8859-1') for line in lines]
53+
54+
metaline_to_pair = lambda line: [x.strip() for x in line.split(':', 1)]
55+
56+
s = Signature()
57+
58+
# Read sample metadata
59+
for i in itertools.count():
60+
if lines[i].strip().startswith('Measurement'):
61+
break
62+
pair = metaline_to_pair(lines[i])
63+
try:
64+
s.sample[pair[0].lower()] = pair[1]
65+
except:
66+
print('line {}: {}'.format(i, lines[i]))
67+
raise
68+
69+
# Read measurment metadata
70+
for j in itertools.count(i):
71+
if len(lines[j].strip()) == 0:
72+
break
73+
pair = metaline_to_pair(lines[j])
74+
s.measurement[pair[0].lower()] = pair[1]
75+
76+
# Read signature spectrum
77+
pairs = []
78+
for line in lines[j:]:
79+
line = line.strip()
80+
if len(line) == 0:
81+
continue
82+
pair = line.split()
83+
nItems = len(pair)
84+
85+
# Try to handle invalid values on signature lines
86+
if nItems == 1:
87+
print('single item (%s) on signature line, %s' \
88+
% (pair[0], filename))
89+
continue
90+
elif nItems > 2:
91+
print('more than 2 values on signature line,', filename)
92+
continue
93+
try:
94+
x = float(pair[0])
95+
except:
96+
print('corrupt signature line,', filename)
97+
if x == 0:
98+
# print 'Zero wavelength value', filename
99+
continue
100+
elif x < 0:
101+
print('Negative wavelength value,', filename)
102+
continue
103+
104+
pairs.append(pair)
105+
106+
[x, y] = [list(v) for v in zip(*pairs)]
107+
108+
s.x = [float(val) for val in x]
109+
s.y = [float(val) for val in y]
110+
s.measurement['first x value'] = x[0]
111+
s.measurement['last x value'] = x[-1]
112+
s.measurement['number of x values'] = len(x)
113+
114+
return s
115+
116+
class EcostressDatabase(AsterDatabase):
117+
'''A relational database to manage ECOSTRESS spectral library data.'''
118+
119+
@classmethod
120+
def create(cls, filename, data_dir=None):
121+
'''Creates an ECOSTRESS relational database by parsing ECOSTRESS data files.
122+
123+
Arguments:
124+
125+
`filename` (str):
126+
127+
Name of the new sqlite database file to create.
128+
129+
`data_dir` (str):
130+
131+
Path to the directory containing ECOSTRESS library data files. If
132+
this argument is not provided, no data will be imported.
133+
134+
Returns:
135+
136+
An :class:`~spectral.database.EcostressDatabase` object.
137+
138+
Example::
139+
140+
>>> EcostressDatabase.create("ecostress.db", "./eco_data_ver1/")
141+
142+
This is a class method (it does not require instantiating an
143+
EcostressDatabase object) that creates a new database by parsing all of the
144+
files in the ECOSTRESS library data directory. Normally, this should only
145+
need to be called once. Subsequently, a corresponding database object
146+
can be created by instantiating a new EcostressDatabase object with the
147+
path the database file as its argument. For example::
148+
149+
>>> from spectral.database.ecostress import EcostressDatabase
150+
>>> db = EcostressDatabase("~/ecostress.db")
151+
'''
152+
import os
153+
if os.path.isfile(filename):
154+
raise Exception('Error: Specified file already exists.')
155+
db = cls()
156+
db._connect(filename)
157+
for schema in cls.schemas:
158+
db.cursor.execute(schema)
159+
if data_dir:
160+
db._import_files(data_dir)
161+
return db
162+
163+
def read_file(self, filename):
164+
return read_ecostress_file(filename)
165+
166+
def _import_files(self, data_dir, ignore=None):
167+
'''Import each file from the ECOSTRESS library into the database.'''
168+
from glob import glob
169+
import numpy
170+
import os
171+
172+
if not os.path.isdir(data_dir):
173+
raise Exception('Error: Invalid directory name specified.')
174+
if ignore is not None:
175+
filesToIgnore = [data_dir + '/' + f for f in ignore]
176+
else:
177+
filesToIgnore = []
178+
179+
numFiles = 0
180+
numIgnored = 0
181+
182+
sigID = 1
183+
184+
class Sig:
185+
pass
186+
sigs = []
187+
188+
for f in glob(data_dir + '/*spectrum.txt'):
189+
if f in filesToIgnore:
190+
numIgnored += 1
191+
continue
192+
print('Importing %s.' % f)
193+
numFiles += 1
194+
sig = self.read_file(f)
195+
s = sig.sample
196+
if 'particle size' in s:
197+
if s['particle size'].lower == 'liquid':
198+
phase = 'liquid'
199+
else:
200+
phase = 'solid'
201+
else:
202+
phase = 'unknown'
203+
s['particle size'] = 'none'
204+
if 'sample no.' in s:
205+
sampleNum = s['sample no.']
206+
else:
207+
sampleNum = ''
208+
subclass = s.get('subclass', 'none')
209+
if subclass is 'none' and 'genus' in s:
210+
subclass = s['genus']
211+
id = self._add_sample(s['name'], s['type'], s['class'], subclass,
212+
s['particle size'], sampleNum, s['owner'],
213+
s['origin'], phase, s['description'])
214+
215+
instrument = os.path.basename(f).split('.')[1]
216+
environment = 'lab'
217+
m = sig.measurement
218+
219+
# Correct numerous mispellings of "reflectance" and "transmittance"
220+
yUnit = m['y units']
221+
if yUnit.find('reflectence') > -1:
222+
yUnit = 'reflectance (percent)'
223+
elif yUnit.find('trans') == 0:
224+
yUnit = 'transmittance (percent)'
225+
measurement = m['measurement']
226+
if measurement[0] == 't':
227+
measurement = 'transmittance'
228+
self._add_signature(id, -1, instrument, environment, measurement,
229+
m['x units'], yUnit, m['first x value'],
230+
m['last x value'], sig.x, sig.y)
231+
if numFiles == 0:
232+
print('No data files were found in directory "%s".' \
233+
% data_dir)
234+
else:
235+
print('Processed %d files.' % numFiles)
236+
if numIgnored > 0:
237+
print('Ignored the following %d bad files:' % (numIgnored))
238+
for f in filesToIgnore:
239+
print('\t' + f)
240+
241+
return sigs

0 commit comments

Comments
 (0)