Skip to content

Commit 1138a95

Browse files
committed
ENH: Add CaretSpecFile type for use with CIFTI-2
1 parent 344bfd8 commit 1138a95

File tree

1 file changed

+199
-0
lines changed

1 file changed

+199
-0
lines changed

nibabel/cifti2/caretspec.py

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
2+
# vi: set ft=python sts=4 ts=4 sw=4 et:
3+
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
4+
#
5+
# See COPYING file distributed along with the NiBabel package for the
6+
# copyright and license terms.
7+
#
8+
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
9+
"""Read / write access to CaretSpecFile format
10+
11+
The format of CaretSpecFiles does not seem to have any independent
12+
documentation.
13+
14+
Code can be found here [0], and a DTD was worked out in this email thread [1].
15+
16+
[0]: https://github.com/Washington-University/workbench/tree/master/src/Files
17+
[1]: https://groups.google.com/a/humanconnectome.org/g/hcp-users/c/EGuwdaTVFuw/m/tg7a_-7mAQAJ
18+
"""
19+
import xml.etree.ElementTree as et
20+
21+
from .. import xmlutils as xml
22+
from ..caret import CaretMetaData
23+
24+
25+
class CaretSpecDataFile(xml.XmlSerializable):
26+
"""DataFile
27+
28+
* Attributes
29+
30+
* Structure - A string from the BrainStructure list to identify
31+
what structure this element refers to (usually left cortex,
32+
right cortex, or cerebellum).
33+
* DataFileType - A string from the DataFileType list
34+
* Selected - A boolean
35+
36+
* Child Elements: [NA]
37+
* Text Content: A URI
38+
* Parent Element - CaretSpecFile
39+
40+
Attributes
41+
----------
42+
structure : str
43+
Name of brain structure
44+
data_file_type : str
45+
Type of data file
46+
selected : bool
47+
Used for workbench internals
48+
uri : str
49+
URI of data file
50+
"""
51+
52+
def __init__(self, structure=None, data_file_type=None, selected=None, uri=None):
53+
super().__init__()
54+
self.structure = structure
55+
self.data_file_type = data_file_type
56+
self.selected = selected
57+
self.uri = uri
58+
59+
def _to_xml_element(self):
60+
data_file = xml.Element('DataFile')
61+
data_file.attrib['Structure'] = str(self.structure)
62+
data_file.attrib['DataFileType'] = str(self.data_file_type)
63+
data_file.attrib['Selected'] = 'true' if self.selected else 'false'
64+
data_file.text = self.uri
65+
return data_file
66+
67+
def __repr__(self):
68+
return self.to_xml().decode()
69+
70+
71+
class CaretSpecFile(xml.XmlSerializable):
72+
"""Class for CaretSpecFile XML documents
73+
74+
These are used to identify related surfaces and volumes for use with CIFTI-2
75+
data files.
76+
"""
77+
78+
def __init__(self, metadata=None, data_files=(), version='1.0'):
79+
super().__init__()
80+
if metadata is not None:
81+
metadata = CaretMetaData(metadata)
82+
self.metadata = metadata
83+
self.data_files = list(data_files)
84+
self.version = version
85+
86+
def _to_xml_element(self):
87+
caret_spec = xml.Element('CaretSpecFile')
88+
caret_spec.attrib['Version'] = str(self.version)
89+
if self.metadata is not None:
90+
caret_spec.append(self.metadata._to_xml_element())
91+
for data_file in self.data_files:
92+
caret_spec.append(data_file._to_xml_element())
93+
return caret_spec
94+
95+
def to_xml(self, enc='UTF-8', **kwargs):
96+
ele = self._to_xml_element()
97+
et.indent(ele, ' ')
98+
return et.tostring(ele, enc, xml_declaration=True, short_empty_elements=False, **kwargs)
99+
100+
def __eq__(self, other):
101+
return self.to_xml() == other.to_xml()
102+
103+
@classmethod
104+
def from_filename(klass, fname, **kwargs):
105+
parser = CaretSpecParser(**kwargs)
106+
with open(fname, 'rb') as fobj:
107+
parser.parse(fptr=fobj)
108+
return parser.caret_spec
109+
110+
111+
class CaretSpecParser(xml.XmlParser):
112+
def __init__(self, encoding=None, buffer_size=3500000, verbose=0):
113+
super().__init__(encoding=encoding, buffer_size=buffer_size, verbose=verbose)
114+
self.fsm_state = []
115+
self.struct_state = []
116+
117+
self.caret_spec = None
118+
119+
# where to write CDATA:
120+
self.write_to = None
121+
122+
# Collecting char buffer fragments
123+
self._char_blocks = []
124+
125+
def StartElementHandler(self, name, attrs):
126+
self.flush_chardata()
127+
if name == 'CaretSpecFile':
128+
self.caret_spec = CaretSpecFile(version=attrs['Version'])
129+
elif name == 'MetaData':
130+
self.caret_spec.metadata = CaretMetaData()
131+
elif name == 'MD':
132+
self.fsm_state.append('MD')
133+
self.struct_state.append(['', ''])
134+
elif name in ('Name', 'Value'):
135+
self.write_to = name
136+
elif name == 'DataFile':
137+
selected_map = {'true': True, 'false': False}
138+
data_file = CaretSpecDataFile(
139+
structure=attrs['Structure'],
140+
data_file_type=attrs['DataFileType'],
141+
selected=selected_map[attrs['Selected']],
142+
)
143+
self.caret_spec.data_files.append(data_file)
144+
self.struct_state.append(data_file)
145+
self.write_to = 'DataFile'
146+
147+
def EndElementHandler(self, name):
148+
self.flush_chardata()
149+
if name == 'CaretSpecFile':
150+
...
151+
elif name == 'MetaData':
152+
...
153+
elif name == 'MD':
154+
key, value = self.struct_state.pop()
155+
self.caret_spec.metadata[key] = value
156+
elif name in ('Name', 'Value'):
157+
self.write_to = None
158+
elif name == 'DataFile':
159+
self.struct_state.pop()
160+
self.write_to = None
161+
162+
def CharacterDataHandler(self, data):
163+
"""Collect character data chunks pending collation
164+
165+
The parser breaks the data up into chunks of size depending on the
166+
buffer_size of the parser. A large bit of character data, with standard
167+
parser buffer_size (such as 8K) can easily span many calls to this
168+
function. We thus collect the chunks and process them when we hit start
169+
or end tags.
170+
"""
171+
if self._char_blocks is None:
172+
self._char_blocks = []
173+
self._char_blocks.append(data)
174+
175+
def flush_chardata(self):
176+
"""Collate and process collected character data"""
177+
if self._char_blocks is None:
178+
return
179+
180+
# Just join the strings to get the data. Maybe there are some memory
181+
# optimizations we could do by passing the list of strings to the
182+
# read_data_block function.
183+
data = ''.join(self._char_blocks)
184+
# Reset the char collector
185+
self._char_blocks = None
186+
# Process data
187+
if self.write_to == 'Name':
188+
data = data.strip() # .decode('utf-8')
189+
pair = self.struct_state[-1]
190+
pair[0] = data
191+
192+
elif self.write_to == 'Value':
193+
data = data.strip() # .decode('utf-8')
194+
pair = self.struct_state[-1]
195+
pair[1] = data
196+
197+
elif self.write_to == 'DataFile':
198+
data = data.strip()
199+
self.struct_state[-1].uri = data

0 commit comments

Comments
 (0)