Skip to content

Commit 8af9bc8

Browse files
authored
Merge pull request #62 from HEPData/root-support-inf
Add support for underflow/overflow bins in ROOT export
2 parents a40a9d4 + a1063d4 commit 8af9bc8

File tree

7 files changed

+80
-41
lines changed

7 files changed

+80
-41
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
steps:
1717
- uses: actions/checkout@v4
1818
- name: Set up Python 3.12
19-
uses: actions/setup-python@v4
19+
uses: actions/setup-python@v5
2020
with:
2121
python-version: '3.12'
2222
- name: Install pip dependencies

README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ This Python 3 library provides support for converting:
3737
* Old HepData input format (`sample <https://github.com/HEPData/hepdata-submission/blob/main/examples/oldhepdata/sample.oldhepdata>`_) to `YAML <https://github.com/HEPData/hepdata-submission>`_
3838
* `YAML <https://github.com/HEPData/hepdata-submission>`_ to:
3939

40-
* `ROOT <https://root.cern.ch>`_ (tested with v6.32/04)
40+
* `ROOT <https://root.cern.ch>`_ (tested with v6.32.04)
4141
* `YODA <https://yoda.hepforge.org>`_ (tested with v2.1.0)
4242
* `CSV <https://en.wikipedia.org/wiki/Comma-separated_values>`_
4343

hepdata_converter/testsuite/test_rootwriter.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,7 @@ def test_simple_parse(self, yaml_full_path, full_root_path):
7878
f_orig.Close()
7979

8080
@insert_path('yaml_full')
81-
@insert_path('root/full.root')
82-
def test_th1_parse(self, yaml_full_path, full_root_path):
81+
def test_th1_parse(self, yaml_full_path):
8382
output_file_path = os.path.join(self.current_tmp, 'datafile.root')
8483
hepdata_converter.convert(yaml_full_path, output_file_path,
8584
options={'output_format': 'root',
@@ -97,3 +96,10 @@ def test_parse_all(self, test_submissions):
9796
'hepdata_doi': '10.17182/hepdata.12345.v1'})
9897

9998
self.assertNotEqual(os.stat(output_file_path).st_size, 0, 'output root file is empty')
99+
100+
@insert_path('yaml_inf')
101+
def test_parse_with_overflows(self, yaml_inf_path):
102+
output_file_path = os.path.join(self.current_tmp, 'data-inf.root')
103+
hepdata_converter.convert(yaml_inf_path, output_file_path,
104+
options={'output_format': 'root'})
105+
pass

hepdata_converter/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# this file ideally should only contain __version__ declaration, as anything else
22
# may break setup.py and PyPI uploads
3-
__version__ = '0.3.0'
3+
__version__ = '0.3.1'

hepdata_converter/writers/array_writer.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def match_and_create(cls, independent_variables_map, dependent_variable, depende
8383
return cls(independent_variables_map, dependent_variable, dependent_variable_index).create_objects()
8484
return []
8585

86-
def calculate_total_errors(self):
86+
def calculate_total_errors(self, for_tgraph=False):
8787
is_number_list = self.is_number_var(self.dependent_variable)
8888
for independent_variable in self.independent_variable_map:
8989
xerr_minus = []
@@ -93,7 +93,7 @@ def calculate_total_errors(self):
9393
xval = []
9494
self.xval.append(xval)
9595
ArrayWriter.calculate_total_errors(independent_variable, is_number_list,
96-
xerr_minus, xerr_plus, xval)
96+
xerr_minus, xerr_plus, xval, {}, for_tgraph)
9797
ArrayWriter.calculate_total_errors(self.dependent_variable, is_number_list,
9898
self.yerr_minus, self.yerr_plus, self.yval, self.err_breakdown)
9999

@@ -158,7 +158,7 @@ def process_error_labels(value):
158158
break
159159

160160
@staticmethod
161-
def calculate_total_errors(variable, is_number_list, min_errs, max_errs, values, err_breakdown={}):
161+
def calculate_total_errors(variable, is_number_list, min_errs, max_errs, values, err_breakdown={}, for_tgraph=False):
162162
i_numeric = -1 # bin number excluding non-numeric y values
163163
for i, entry in enumerate(variable['values']):
164164
if not is_number_list[i]:
@@ -209,10 +209,29 @@ def calculate_total_errors(variable, is_number_list, min_errs, max_errs, values,
209209
min_errs.append(0.0)
210210
max_errs.append(0.0)
211211
else:
212-
middle_val = (entry['high'] - entry['low']) * 0.5 + entry['low']
213-
values.append(middle_val)
214-
min_errs.append(middle_val - entry['low'])
215-
max_errs.append(entry['high'] - middle_val)
212+
if entry['low'] == float('-inf'): # underflow bin
213+
if for_tgraph: # zero-width bin centred on upper limit
214+
values.append(entry['high'])
215+
min_errs.append(0.0)
216+
max_errs.append(0.0)
217+
else: # infinite-width bin centred on -inf
218+
values.append(entry['low'])
219+
min_errs.append(float('inf'))
220+
max_errs.append(float('inf'))
221+
elif entry['high'] == float('inf'): # overflow bin
222+
if for_tgraph: # zero-width bin centred on lower limit
223+
values.append(entry['low'])
224+
min_errs.append(0.0)
225+
max_errs.append(0.0)
226+
else: # infinite-width bin centred on +inf
227+
values.append(entry['high'])
228+
min_errs.append(float('inf'))
229+
max_errs.append(float('inf'))
230+
else:
231+
middle_val = (entry['high'] - entry['low']) * 0.5 + entry['low']
232+
values.append(middle_val)
233+
min_errs.append(middle_val - entry['low'])
234+
max_errs.append(entry['high'] - middle_val)
216235

217236

218237
@classmethod

hepdata_converter/writers/root_writer.py

Lines changed: 42 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import array
66
import tempfile
77
import os
8+
import math
89
from ctypes import c_char_p
910
from hepdata_converter.writers.utils import error_value_processor
1011

@@ -55,31 +56,36 @@ def _create_empty_hist(self, dependent_var_title, index, yval):
5556

5657
name = "Hist%sD_y%s_e%s" % (self.dim, self.dependent_variable_index + 1, index)
5758

58-
# order bin values of independent variables
59+
# order bin values of independent variables and check if underflow/overflow bins present
5960
xval_ordered = []
61+
underflow = []
62+
overflow = []
6063
for i in range(self.dim):
6164
xval_ordered.append([])
6265
xval_ordered[i] = sorted(xval[i])
66+
nbinsi = len(xval_ordered[i]) - 1
67+
underflow.append(1 if not math.isfinite(xval_ordered[i][0]) else 0)
68+
overflow.append(1 if not math.isfinite(xval_ordered[i][nbinsi]) else 0)
6369

6470
if 1 == self.dim:
65-
nbinsx = len(xval_ordered[0]) - 1
66-
binsx = array.array('d', xval_ordered[0])
71+
nbinsx = len(xval_ordered[0]) - 1 - underflow[0] - overflow[0]
72+
binsx = array.array('d', xval_ordered[0][underflow[0]:nbinsx+2])
6773
hist = self.get_hist_classes()[self.dim - 1](self.sanitize_name(name), '', nbinsx, binsx)
6874

6975
if 2 == self.dim:
70-
nbinsx = len(xval_ordered[0]) - 1
71-
binsx = array.array('d', xval_ordered[0])
72-
nbinsy = len(xval_ordered[1]) - 1
73-
binsy = array.array('d', xval_ordered[1])
76+
nbinsx = len(xval_ordered[0]) - 1 - underflow[0] - overflow[0]
77+
binsx = array.array('d', xval_ordered[0][underflow[0]:nbinsx+2])
78+
nbinsy = len(xval_ordered[1]) - 1 - underflow[1] - overflow[1]
79+
binsy = array.array('d', xval_ordered[1][underflow[1]:nbinsy+2])
7480
hist = self.get_hist_classes()[self.dim - 1](self.sanitize_name(name), '', nbinsx, binsx, nbinsy, binsy)
7581

7682
if 3 == self.dim:
77-
nbinsx = len(xval_ordered[0]) - 1
78-
binsx = array.array('d', xval_ordered[0])
79-
nbinsy = len(xval_ordered[1]) - 1
80-
binsy = array.array('d', xval_ordered[1])
81-
nbinsz = len(xval_ordered[2]) - 1
82-
binsz = array.array('d', xval_ordered[2])
83+
nbinsx = len(xval_ordered[0]) - 1 - underflow[0] - overflow[0]
84+
binsx = array.array('d', xval_ordered[0][underflow[0]:nbinsx+2])
85+
nbinsy = len(xval_ordered[1]) - 1 - underflow[1] - overflow[1]
86+
binsy = array.array('d', xval_ordered[1][underflow[1]:nbinsy+2])
87+
nbinsz = len(xval_ordered[2]) - 1 - underflow[2] - overflow[2]
88+
binsz = array.array('d', xval_ordered[2][underflow[2]:nbinsz+2])
8389
hist = self.get_hist_classes()[self.dim - 1](self.sanitize_name(name), '', nbinsx, binsx, nbinsy, binsy, nbinsz, binsz)
8490

8591
for i in range(self.dim):
@@ -105,31 +111,36 @@ def _create_hist(self, xval):
105111
name = "Hist%sD_y%s" % (self.dim, self.dependent_variable_index + 1)
106112
args = []
107113

108-
# order bin values of independent variables
114+
# order bin values of independent variables and check if underflow/overflow bins present
109115
xval_ordered = []
116+
underflow = []
117+
overflow = []
110118
for i in range(self.dim):
111119
xval_ordered.append([])
112120
xval_ordered[i] = sorted(xval[i])
121+
nbinsi = len(xval_ordered[i]) - 1
122+
underflow.append(1 if not math.isfinite(xval_ordered[i][0]) else 0)
123+
overflow.append(1 if not math.isfinite(xval_ordered[i][nbinsi]) else 0)
113124

114125
if 1 == self.dim:
115-
nbinsx = len(xval_ordered[0]) - 1
116-
binsx = array.array('d', xval_ordered[0])
126+
nbinsx = len(xval_ordered[0]) - 1 - underflow[0] - overflow[0]
127+
binsx = array.array('d', xval_ordered[0][underflow[0]:nbinsx+2])
117128
hist = self.get_hist_classes()[self.dim - 1](self.sanitize_name(name), '', nbinsx, binsx)
118129

119130
if 2 == self.dim:
120-
nbinsx = len(xval_ordered[0]) - 1
121-
binsx = array.array('d', xval_ordered[0])
122-
nbinsy = len(xval_ordered[1]) - 1
123-
binsy = array.array('d', xval_ordered[1])
131+
nbinsx = len(xval_ordered[0]) - 1 - underflow[0] - overflow[0]
132+
binsx = array.array('d', xval_ordered[0][underflow[0]:nbinsx+2])
133+
nbinsy = len(xval_ordered[1]) - 1 - underflow[1] - overflow[1]
134+
binsy = array.array('d', xval_ordered[1][underflow[1]:nbinsy+2])
124135
hist = self.get_hist_classes()[self.dim - 1](self.sanitize_name(name), '', nbinsx, binsx, nbinsy, binsy)
125136

126137
if 3 == self.dim:
127-
nbinsx = len(xval_ordered[0]) - 1
128-
binsx = array.array('d', xval_ordered[0])
129-
nbinsy = len(xval_ordered[1]) - 1
130-
binsy = array.array('d', xval_ordered[1])
131-
nbinsz = len(xval_ordered[2]) - 1
132-
binsz = array.array('d', xval_ordered[2])
138+
nbinsx = len(xval_ordered[0]) - 1 - underflow[0] - overflow[0]
139+
binsx = array.array('d', xval_ordered[0][underflow[0]:nbinsx+2])
140+
nbinsy = len(xval_ordered[1]) - 1 - underflow[1] - overflow[1]
141+
binsy = array.array('d', xval_ordered[1][underflow[1]:nbinsy+2])
142+
nbinsz = len(xval_ordered[2]) - 1 - underflow[2] - overflow[2]
143+
binsz = array.array('d', xval_ordered[2][underflow[2]:nbinsz+2])
133144
hist = self.get_hist_classes()[self.dim - 1](self.sanitize_name(name), '', nbinsx, binsx, nbinsy, binsy, nbinsz, binsz)
134145

135146
for i in range(self.dim):
@@ -246,6 +257,9 @@ def create_objects(self):
246257
if x['high'] not in xval[i]:
247258
xval[i].append(x['high'])
248259

260+
if not any(xval):
261+
return []
262+
249263
try:
250264
hist = self._create_hist(xval)
251265
except:
@@ -280,7 +294,7 @@ def match(cls, independent_variables_map, dependent_variable):
280294
return False
281295

282296
def create_objects(self):
283-
self.calculate_total_errors()
297+
self.calculate_total_errors(for_tgraph=True)
284298

285299
# check that errors are symmetric (within a tolerance to allow for numerical rounding)
286300
tol = 1e-15
@@ -335,7 +349,7 @@ def match(cls, independent_variables_map, dependent_variable):
335349
return False
336350

337351
def create_objects(self):
338-
self.calculate_total_errors()
352+
self.calculate_total_errors(for_tgraph=True)
339353

340354
if len(self.xval[0]):
341355
graph = ROOTModule.TGraphAsymmErrors(len(self.xval[0]),

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def get_version():
3333
setup(
3434
name='hepdata-converter',
3535
version=get_version(),
36-
install_requires=['hepdata-validator>=0.3.5'],
36+
install_requires=['hepdata-validator>=0.3.6'],
3737
entry_points={
3838
'console_scripts': [
3939
'hepdata-converter = hepdata_converter:main',

0 commit comments

Comments
 (0)