Skip to content

Commit 1e1c079

Browse files
authored
feat: add LHEFile.count_events (#334)
* feat: LHEFile.count_events Closes: #333 * Also terminate event parsing on closing LesHoucheEvents tag * fix: typo * Clean comments * rename * test count returns negative number of events * print -> warnings for parse errors * expect a RuntimeWarning * Test for Parse Error in fromfile * Make StopIteration error more understandable
1 parent 7e0a47f commit 1e1c079

File tree

5 files changed

+142
-20
lines changed

5 files changed

+142
-20
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ from skhep_testdata import data_path
6161
import pylhe
6262

6363
lhe_file = data_path("pylhe-testlhef3.lhe")
64-
events = pylhe.read_lhe_with_attributes(lhe_file)
65-
print(f"Number of events: {pylhe.read_num_events(lhe_file)}")
64+
events = pylhe.LHEFile.fromfile(lhe_file).events
65+
print(f"Number of events: {pylhe.LHEFile.count_events(lhe_file)}")
6666

6767
# Get event 1
6868
event = next(itertools.islice(events, 1, 2))
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""
2+
Benchmark tests for pylhe event counting performance.
3+
"""
4+
5+
import skhep_testdata
6+
7+
import pylhe
8+
9+
# Test data files from skhep_testdata - all LHE and LHE.gz files
10+
TEST_FILES_LHE_ALL = [
11+
skhep_testdata.data_path("pylhe-testfile-pr29.lhe"),
12+
skhep_testdata.data_path("pylhe-testlhef3.lhe"),
13+
*[
14+
skhep_testdata.data_path(f"pylhe-testfile-powheg-box-v2-{proc}.lhe")
15+
for proc in ["Z", "W", "Zj", "trijet", "directphoton", "hvq"]
16+
],
17+
skhep_testdata.data_path("pylhe-testfile-madgraph-2.0.0-wbj.lhe"),
18+
skhep_testdata.data_path("pylhe-testfile-madgraph-2.2.1-Z-ckkwl.lhe.gz"),
19+
skhep_testdata.data_path("pylhe-testfile-madgraph-2.2.1-Z-fxfx.lhe.gz"),
20+
skhep_testdata.data_path("pylhe-testfile-madgraph-2.2.1-Z-mlm.lhe.gz"),
21+
skhep_testdata.data_path("pylhe-testfile-madgraph5-3.5.8-pp_to_jj.lhe.gz"),
22+
skhep_testdata.data_path("pylhe-testfile-pythia-6.413-ttbar.lhe"),
23+
skhep_testdata.data_path("pylhe-testfile-pythia-8.3.14-weakbosons.lhe"),
24+
skhep_testdata.data_path("pylhe-testfile-sherpa-3.0.1-eejjj.lhe"),
25+
skhep_testdata.data_path("pylhe-testfile-whizard-3.1.4-eeWW.lhe"),
26+
]
27+
28+
29+
def test_count_events_benchmark(benchmark):
30+
"""Benchmark using the count_events function across all test files."""
31+
32+
def count_events_all_files(filepaths):
33+
total_events = 0
34+
for filepath in filepaths:
35+
num_events = pylhe.LHEFile.count_events(filepath)
36+
total_events += num_events
37+
return total_events
38+
39+
result = benchmark(count_events_all_files, TEST_FILES_LHE_ALL)
40+
print(f"Total events across all files: {result}")

src/pylhe/__init__.py

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,8 @@ def _fromcontext(
692692
# Clear memory
693693
element.clear()
694694
root.clear()
695+
if element.tag == "LesHouchesEvents" and event == "end":
696+
return
695697

696698
@property
697699
def graph(self) -> graphviz.Digraph:
@@ -860,7 +862,7 @@ def _generator(lhef: LHEFile) -> Iterator[LHEEvent]:
860862
)
861863

862864
except ET.ParseError as excep:
863-
print("WARNING. Parse Error:", excep)
865+
warnings.warn(f"Parse Error: {excep}", RuntimeWarning, stacklevel=1)
864866
return
865867

866868
lhef = cls(
@@ -874,11 +876,45 @@ def _generator(lhef: LHEFile) -> Iterator[LHEEvent]:
874876
events=[],
875877
)
876878
events = _generator(lhef)
877-
next(events) # advance to read lheinit
879+
try:
880+
next(events) # advance to read lheinit
881+
except StopIteration:
882+
# If generator stops without yielding, it means no init was read
883+
err = "No or faulty <init> block found in the LHE file."
884+
raise ValueError(err) from None
878885

879886
lhef.events = events if generator else list(events)
880887
return lhef
881888

889+
@staticmethod
890+
def count_events(filepath: PathLike) -> int:
891+
"""
892+
Efficiently count the number of events in an LHE file without loading them into memory.
893+
894+
Args:
895+
filepath: Path to the LHE file.
896+
897+
Returns:
898+
Number of events in the file, or -1 if parsing fails.
899+
"""
900+
try:
901+
with _extract_fileobj(filepath) as fileobj:
902+
context = ET.iterparse(fileobj, events=["start", "end"])
903+
_, root = next(context) # Get the root element
904+
count = 0
905+
for event, element in context:
906+
if event == "end" and element.tag == "event":
907+
count += 1
908+
# Clear the element to free memory
909+
element.clear()
910+
# Root tracks sub-elements -> clear all sub-elements
911+
root.clear()
912+
if event == "end" and element.tag == "LesHouchesEvents":
913+
return count
914+
except ET.ParseError as excep:
915+
warnings.warn(f"Parse Error: {excep}", RuntimeWarning, stacklevel=1)
916+
return -1
917+
882918

883919
def read_lhe_file(filepath: PathLike, with_attributes: bool = True) -> LHEFile:
884920
"""
@@ -998,23 +1034,17 @@ def read_lhe_with_attributes(filepath: PathLike) -> Iterable[LHEEvent]:
9981034
def read_num_events(filepath: PathLike) -> int:
9991035
"""
10001036
Moderately efficient way to get the number of events stored in a file.
1037+
1038+
.. deprecated:: 1.0.0
1039+
Use `LHEFile.count_events` instead.
10011040
"""
1002-
try:
1003-
with _extract_fileobj(filepath) as fileobj:
1004-
context = ET.iterparse(fileobj, events=["start", "end"])
1005-
_, root = next(context) # Get the root element
1006-
count = 0
1007-
for event, element in context:
1008-
if event == "end" and element.tag == "event":
1009-
count += 1
1010-
# Clear the element to free memory
1011-
element.clear()
1012-
# Root tracks sub-elements -> clear all sub-elements
1013-
root.clear()
1014-
return count
1015-
except ET.ParseError as excep:
1016-
print("WARNING. Parse Error:", excep)
1017-
return -1
1041+
warnings.warn(
1042+
"read_num_events is deprecated and will be removed in a future version. "
1043+
"Use `LHEFile.count_events` instead.",
1044+
DeprecationWarning,
1045+
stacklevel=2,
1046+
)
1047+
return LHEFile.count_events(filepath)
10181048

10191049

10201050
def write_lhe_file_string(

tests/test_errors.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,3 +233,43 @@ def test_whitespace_only_wgt_block_error():
233233
list(pylhe.read_lhe_with_attributes(tmp_file_path))
234234
finally:
235235
os.unlink(tmp_file_path)
236+
237+
238+
def test_count_events_parse_error():
239+
"""Test that ParseError warning is issued and -1 returned when counting events in malformed LHE file."""
240+
# Create a temporary file with invalid XML content
241+
with tempfile.NamedTemporaryFile(mode="w", suffix=".lhe", delete=True) as f:
242+
# Write invalid XML that will cause a parse error
243+
f.write('<LesHouchesEvents version="3.0">\n')
244+
f.write("<init>\n")
245+
f.write("invalid xml content without proper closing\n")
246+
# Missing </init> and </LesHouchesEvents> tags
247+
248+
f.flush()
249+
250+
# Test that a RuntimeWarning is issued and -1 is returned
251+
with pytest.warns(RuntimeWarning, match=r"Parse Error:"):
252+
assert pylhe.LHEFile.count_events(f.name) == -1
253+
254+
255+
def test_fromfile_parse_error():
256+
"""Test that ParseError warning is issued when loading malformed LHE file with fromfile."""
257+
# Create a temporary file with invalid XML content
258+
with tempfile.NamedTemporaryFile(mode="w", suffix=".lhe", delete=True) as f:
259+
# Write invalid XML that will cause a parse error
260+
f.write('<LesHouchesEvents version="3.0">\n')
261+
f.write("<init>\n")
262+
f.write("invalid xml content without proper closing\n")
263+
# Missing </init> and </LesHouchesEvents> tags
264+
265+
f.flush()
266+
267+
# Test that a RuntimeWarning is issued when trying to load the malformed file
268+
# and potentially a ValueError if the generator stops without yielding
269+
with (
270+
pytest.warns(RuntimeWarning, match=r"Parse Error:"),
271+
pytest.raises(
272+
ValueError, match=r"No or faulty <init> block found in the LHE file"
273+
),
274+
):
275+
pylhe.LHEFile.fromfile(f.name)

tests/test_lhe_reader.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@
4444
*TEST_FILES_LHE_SHERPA,
4545
*TEST_FILES_LHE_WHIZARD,
4646
]
47+
TEST_FILES_LHE_ALL = [
48+
TEST_FILE_LHE_v1,
49+
TEST_FILE_LHE_v3,
50+
*TEST_FILES_LHE_GENERATORS,
51+
]
4752

4853

4954
@pytest.fixture(scope="session")
@@ -82,6 +87,13 @@ def test_read_num_events(testdata_gzip_file):
8287
)
8388

8489

90+
@pytest.mark.parametrize("file", TEST_FILES_LHE_ALL)
91+
def test_count_events(file):
92+
assert pylhe.LHEFile.count_events(file) == sum(
93+
1 for _ in pylhe.LHEFile.fromfile(file).events
94+
)
95+
96+
8597
def test_read_lhe_init_gzipped_file(testdata_gzip_file):
8698
assert pylhe.read_lhe_init(TEST_FILE_LHE_v1) == pylhe.read_lhe_init(
8799
testdata_gzip_file

0 commit comments

Comments
 (0)