Skip to content

Commit 120d9f7

Browse files
Fixes and refs
1 parent 3bc3fe4 commit 120d9f7

File tree

2 files changed

+112
-14
lines changed

2 files changed

+112
-14
lines changed

paper/paper.bib

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,113 @@ @article{Scheidgen:2023
1818
author = {Markus Scheidgen and Lauri Himanen and Alvin Noe Ladines and David Sikter and Mohammad Nakhaee and Ádám Fekete and Theodore Chang and Amir Golparvar and José A. Márquez and Sandor Brockhauser and Sebastian Brückner and Luca M. Ghiringhelli and Felix Dietrich and Daniel Lehmberg and Thea Denell and Andrea Albino and Hampus Näsström and Sherjeel Shabih and Florian Dobener and Markus Kühbach and Rubel Mozumder and Joseph F. Rudzinski and Nathan Daelman and José M. Pizarro and Martin Kuban and Cuauhtemoc Salazar and Pavel Ondračka and Hans-Joachim Bungartz and Claudia Draxl},
1919
title = {NOMAD: A distributed web-based platform for managing materials science research data},
2020
journal = {Journal of Open Source Software}
21-
}
21+
}
22+
23+
@article{konnecke2015nexus,
24+
title={The NeXus data format},
25+
author={K{\"o}nnecke, Mark and Akeroyd, Frederick A and Bernstein, Herbert J and Brewster, Aaron S and Campbell, Stuart I and Clausen, Bj{\"o}rn and Cottrell, Stephen and Hoffmann, Jens Uwe and Jemian, Pete R and M{\"a}nnicke, David and others},
26+
journal={Applied Crystallography},
27+
volume={48},
28+
number={1},
29+
pages={301--305},
30+
year={2015},
31+
publisher={International Union of Crystallography}
32+
}
33+
34+
@software{eric_prestat_2025_15548174,
35+
author = {Eric Prestat and
36+
Francisco de la Peña and
37+
Jonas Lähnemann and
38+
Petras Jokubauskas and
39+
Carter Francis and
40+
Vidar Tonaas Fauske and
41+
pietsjoh and
42+
Tomas Ostasevicius and
43+
T. Nemoto and
44+
Tom Furnival and
45+
Duncan N. Johnstone and
46+
Niels Cautaerts and
47+
Suhas Somnath and
48+
pquinn-dls and
49+
Jan Caron and
50+
Katherine E. MacArthur and
51+
Magnus Nord and
52+
Pierre Burdet and
53+
Nicolas Tappy and
54+
Thomas Aarholt and
55+
Timothy Poon and
56+
Joshua Taillon and
57+
actions-user and
58+
Stephanie Ribet and
59+
Attolight-NTappy and
60+
Tom Slater and
61+
Vadim Migunov and
62+
DENSmerijn},
63+
title = {hyperspy/rosettasciio: v0.9.0},
64+
month = may,
65+
year = 2025,
66+
publisher = {Zenodo},
67+
version = {v0.9.0},
68+
doi = {10.5281/zenodo.15548174},
69+
url = {https://doi.org/10.5281/zenodo.15548174},
70+
swhid = {swh:1:dir:dca5e6b5c0268b38a8c20c7ac2f2b8563ebec3e8
71+
;origin=https://doi.org/10.5281/zenodo.8011666;vis
72+
it=swh:1:snp:81d7db282a096b8dad625da0b79b451c9cb84
73+
400;anchor=swh:1:rel:6b6e8cef8b3b352f67409e82fecf4
74+
c1c0203c784;path=hyperspy-rosettasciio-5454676
75+
},
76+
}
77+
78+
@article{wilkinson2016fair,
79+
title={The FAIR Guiding Principles for scientific data management and stewardship},
80+
author={Wilkinson, Mark D and Dumontier, Michel and Aalbersberg, IJsbrand Jan and Appleton, Gabrielle and Axton, Myles and Baak, Arie and Blomberg, Niklas and Boiten, Jan-Willem and da Silva Santos, Luiz Bonino and Bourne, Philip E and others},
81+
journal={Scientific data},
82+
volume={3},
83+
number={1},
84+
pages={1--9},
85+
year={2016},
86+
publisher={Nature Publishing Group}
87+
}
88+
89+
@misc{jacobsen2020fair,
90+
title={FAIR principles: interpretations and implementation considerations},
91+
author={Jacobsen, Annika and de Miranda Azevedo, Ricardo and Juty, Nick and Batista, Dominique and Coles, Simon and Cornet, Ronald and Courtot, M{\'e}lanie and Crosas, Merc{\`e} and Dumontier, Michel and Evelo, Chris T and others},
92+
journal={Data intelligence},
93+
volume={2},
94+
number={1-2},
95+
pages={10--29},
96+
year={2020},
97+
publisher={MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…}
98+
}
99+
100+
@article{barker2022introducing,
101+
title={Introducing the FAIR Principles for research software},
102+
author={Barker, Michelle and Chue Hong, Neil P and Katz, Daniel S and Lamprecht, Anna-Lena and Martinez-Ortiz, Carlos and Psomopoulos, Fotis and Harrow, Jennifer and Castro, Leyla Jael and Gruenpeter, Morane and Martinez, Paula Andrea and others},
103+
journal={Scientific Data},
104+
volume={9},
105+
number={1},
106+
pages={622},
107+
year={2022},
108+
publisher={Nature Publishing Group UK London}
109+
}
110+
111+
@book{mkoennecke2024nexusformat,
112+
title = {nexusformat/cnxvalidate},
113+
url = {https://github.com/nexusformat/cnxvalidate},
114+
author = {mkoennecke and Bernstein, Herbert J. and Richter, Tobias and Caswell, Thomas A and Jemian, Pete R and Levinsen, Yngve},
115+
date = {2024-10-08},
116+
year = {2024},
117+
month = {10},
118+
day = {8},
119+
}
120+
121+
@book{Jemian2025prjemian,
122+
title = {prjemian/punx},
123+
url = {https://github.com/prjemian/punx},
124+
author = {Jemian, Pete R and Ching, Daniel and dependabot[bot] and De Nolf, Wout and Stöckli, Peter},
125+
date = {2025-03-04},
126+
year = {2025},
127+
month = {3},
128+
day = {4},
129+
}
130+

paper/paper.md

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,22 +36,11 @@ bibliography: paper.bib
3636

3737
# Summary
3838

39-
(Add statement like sentence for the start like the Nomad JOSS). pynxtools is a Python framework that standardizes data conversion for scientific experiments (materials characterization, something like This standardized approach addresses the historical problem of incompatible formats across research communities.) to NeXus HDF5 format across diverse scientific domains. pynxtools provides a fixed set of NeXus application definitions (reference needed here) that ensures convergence and alignment in data specifications across photoemission spectroscopy, electron microscopy(Check if there is a review paper, or just cite rosettasciIO), atom probe tomography, optical spectroscopy, scanning probe microscopy, and X-ray diffraction (Ref: CIFF, IUPAC). Through its modular plugin architecture, pynxtools provides maps for instrument-specific raw data and electronic lab notebook metadata to these unified definitions, while performing validation to ensure data correctness and NeXus compliance. By simplifying the adoption of standardized application definitions, the framework enables true data interoperability and FAIR data management across multiple experimental techniques.
40-
(Missing what NeXus is. Add a bit of context.)
41-
(Add it's a CLI tool. And code framework)
42-
(Nexus Reference, https://doi.org/10.1107/S1600576714027575)
43-
(Compromise for Nomad: We have a standalone tool that is also integrated in Nomad as an example.)
44-
(https://github.com/ess-dmsc/nexus-constructor)
45-
(Cite some FAIR papers, one from Jacobson, FAIR4RS, FAIR4Workflows-thisnotsoimp)
46-
47-
39+
Scientific data across experimental physics and materials science remains largely fragmented due to incompatible instrument-specific formats and diverse standardization practices. pynxtools is a Python software development framework with a CLI interface that standardizes data conversion for scientific experiments in materials characterization to NeXus HDF5 format [@konnecke2015nexus] across diverse scientific domains. pynxtools provides a fixed set of NeXus application definitions that ensures convergence and alignment in data specifications across photoemission spectroscopy, electron microscopy [@eric_prestat_2025_15548174], atom probe tomography, optical spectroscopy, scanning probe microscopy, and X-ray diffraction. Through its modular plugin architecture, pynxtools provides maps for instrument-specific raw data and electronic lab notebook metadata to these unified definitions, while performing validation to ensure data correctness and NeXus compliance. By simplifying the adoption of standardized application definitions, the framework enables true data interoperability and FAIR [@wilkinson2016fair] [@jacobsen2020fair] [@barker2022introducing] data management across multiple experimental techniques.
4840

4941
# Statement of need
5042

51-
Scientific data across experimental physics and materials science remains largely non-FAIR (Findable, Accessible, Interoperable, and Reproducible) due to inconsistent implementation and documentation of standardized data formats. While NeXus provides comprehensive data specifications for structured scientific data storage, researchers typically struggle with its specification requirements, leading to incomplete implementations, non-compliant outputs, or abandonment of standardization efforts entirely. Existing tools (do some parts of what we offer, and then list what we provide) lack robust validation frameworks and provide insufficient guidance for proper NeXus adoption. pynxtools addresses this critical gap by providing an accessible framework that enforces complete NeXus application definition compliance through automated validation, detailed error reporting for missing required data points, and clear implementation pathways via configuration files and extensible plugins. This approach transforms NeXus from a complex specification into a practical solution, enabling researchers to achieve true data interoperability without deep technical expertise in the underlying standards.
52-
(Can we tone down the first sentence to not offend readers/people who worked before. Don't dwell on stuff not working. How to make this sound less negative?)
53-
(Second sentence: We base everything on NeXus. We don't wanna say it's too complicated. Make it nicer. We make it more easier to handle it.)
54-
(Third: refer to cnxvalidate and such)
43+
Achieving FAIR (Findable, Accessible, Interoperable, and Reproducible) data principles in experimental physics and materials science requires consistent implementation of standardized data formats. While NeXus provides comprehensive data specifications for structured scientific data storage, pynxtools simplifies the implementation process for developers and researchers by providing guided workflows and automated validation to ensure complete compliance. Existing tools [@mkoennecke2024nexusformat] [@Jemian2025prjemian] lack robust validation frameworks and provide insufficient guidance for proper NeXus adoption. pynxtools addresses this critical gap by providing an accessible framework that enforces complete NeXus application definition compliance through automated validation, detailed error reporting for missing required data points, and clear implementation pathways via configuration files and extensible plugins. This approach transforms NeXus from a complex specification into a practical solution, enabling researchers to achieve true data interoperability without deep technical expertise in the underlying standards.
5544

5645
# Dataconverter and validation (Sherjeel)
5746

0 commit comments

Comments
 (0)