Skip to content

Commit b6c4962

Browse files
committed
DataFrame: Add Attribute Columns
Optionally add particle species attributes as extra columns. This is super useful when dealing with openPMD extensions or custom attributes, e.g., for accelerator physics. In the latter case, we store reference particle information on the particle species group (changes per iteration/snapshot).
1 parent 0dc2d71 commit b6c4962

File tree

4 files changed

+46
-16
lines changed

4 files changed

+46
-16
lines changed

docs/source/analysis/pandas.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,17 @@ One can also combine all iterations in a single dataframe like this:
5050
# like before but with a new column "iteration" and all particles
5151
print(df)
5252
53+
Additionally, one can add additional openPMD particle species attributes, e.g.,
54+
from the `ED-PIC <https://github.com/openPMD/openPMD-standard/blob/1.1.0/EXT_ED-PIC.md#particle-records-macroparticles>`__ extension
55+
or `custom code properties <https://impactx.readthedocs.io/en/25.11/dataanalysis/dataanalysis.html#additional-beam-attributes>`__
56+
as extra dataframe columns:
57+
58+
.. code-block:: python
59+
60+
df = s.to_df("electrons", attributes=["s_ref"])
61+
62+
# like before but with a new column "s_ref"
63+
print(df)
5364
5465
.. _analysis-pandas-ascii:
5566

examples/11_particle_dataframe.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,27 @@
3838
s = io.Series("../samples/git-sample/data%T.h5", io.Access.read_only)
3939
electrons = s.snapshots()[400].particles["electrons"]
4040

41-
# all particles
42-
df = electrons.to_df()
41+
# all particles, extra column for "particleShape" attribute
42+
# (from ED-PIC extension)
43+
df = electrons.to_df(attributes=["particleShape"])
4344
print(type(df) is pd.DataFrame)
4445
print(df)
4546

4647
# only first 100 particles
47-
df = electrons.to_df(np.s_[:100])
48+
df = electrons.to_df(slice=np.s_[:100])
4849
print(df)
4950

5051
# all particles over all steps
51-
df = s.to_df("electrons")
52+
df = s.to_df("electrons", attributes=["particleShape"])
5253
print(df)
5354

5455
if found_cudf:
5556
# all particles - to GPU
56-
cdf = cudf.from_pandas(electrons.to_df())
57+
cdf = cudf.from_pandas(electrons.to_df(attributes=["particleShape"]))
5758
print(cdf)
5859

5960
# all particles over all steps - to GPU
60-
cdf = s.to_cudf("electrons")
61+
cdf = s.to_cudf("electrons", attributes=["particleShape"])
6162
print(cdf)
6263

6364
# Particles
@@ -67,7 +68,7 @@
6768
# pickle capabilities, so we test this here:
6869
dask.config.set(scheduler='processes')
6970

70-
df = electrons.to_dask()
71+
df = electrons.to_dask(attributes=["particleShape"])
7172
print(df)
7273

7374
# check chunking of a variable

src/binding/python/openpmd_api/DaskDataFrame.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,22 @@
88
import numpy as np
99

1010

11-
def read_chunk_to_df(species, chunk):
11+
def read_chunk_to_df(species, chunk, attributes=None):
1212
stride = np.s_[chunk.offset[0]:chunk.offset[0]+chunk.extent[0]]
13-
return species.to_df(stride)
13+
return species.to_df(attributes=attributes, slice=stride)
1414

1515

16-
def particles_to_daskdataframe(particle_species):
16+
def particles_to_daskdataframe(particle_species, attributes=None):
1717
"""
1818
Load all records of a particle species into a Dask DataFrame.
1919
2020
Parameters
2121
----------
2222
particle_species : openpmd_api.ParticleSpecies
2323
A ParticleSpecies class in openPMD-api.
24+
attributes : list of strings, optional
25+
A list of attributes of the particle_species that should be read and
26+
added as extra columns.
2427
2528
Returns
2629
-------
@@ -83,7 +86,9 @@ def particles_to_daskdataframe(particle_species):
8386

8487
# merge DataFrames
8588
dfs = [
86-
delayed(read_chunk_to_df)(particle_species, chunk) for chunk in chunks
89+
delayed(read_chunk_to_df)(
90+
particle_species, chunk=chunk, attributes=attributes
91+
) for chunk in chunks
8792
]
8893
df = dd.from_delayed(dfs)
8994

src/binding/python/openpmd_api/DataFrame.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,17 @@
1010
import numpy as np
1111

1212

13-
def particles_to_dataframe(particle_species, slice=None):
13+
def particles_to_dataframe(particle_species, attributes=None, slice=None):
1414
"""
1515
Load all records of a particle species into a Pandas DataFrame.
1616
1717
Parameters
1818
----------
1919
particle_species : openpmd_api.ParticleSpecies
2020
A ParticleSpecies class in openPMD-api.
21+
attributes : list of strings, optional
22+
A list of attributes of the particle_species that should be read and
23+
added as extra columns.
2124
slice : np.s_, optional
2225
A numpy slice that can be used to load only a sub-selection of
2326
particles.
@@ -69,14 +72,18 @@ def particles_to_dataframe(particle_species, slice=None):
6972

7073
df = pd.DataFrame(columns)
7174

75+
if attributes is not None:
76+
for attribute in attributes:
77+
df[attribute] = particle_species.get_attribute(attribute)
78+
7279
# set a header for the first column (row index)
7380
# note: this is NOT the particle id
7481
df.index.name = "row"
7582

7683
return df
7784

7885

79-
def iterations_to_dataframe(series, species_name):
86+
def iterations_to_dataframe(series, species_name, attributes=None):
8087
"""
8188
Load all iterations of a particle species into a Pandas DataFrame.
8289
@@ -86,6 +93,9 @@ def iterations_to_dataframe(series, species_name):
8693
A Series class in openPMD-api.
8794
species_name : string
8895
The name of a particle species.
96+
attributes : list of strings, optional
97+
A list of attributes of the particle_species that should be read and
98+
added as extra columns.
8999
90100
Returns
91101
-------
@@ -115,7 +125,7 @@ def iterations_to_dataframe(series, species_name):
115125
(
116126
iteration
117127
.particles[species_name]
118-
.to_df()
128+
.to_df(attributes=attributes)
119129
.assign(iteration=i)
120130
for i, iteration in series.snapshots().items()
121131
),
@@ -126,7 +136,7 @@ def iterations_to_dataframe(series, species_name):
126136
return df
127137

128138

129-
def iterations_to_cudf(series, species_name):
139+
def iterations_to_cudf(series, species_name, attributes=None):
130140
"""
131141
Load all iterations of a particle species into a cuDF DataFrame.
132142
@@ -136,6 +146,9 @@ def iterations_to_cudf(series, species_name):
136146
A Series class in openPMD-api.
137147
species_name : string
138148
The name of a particle species.
149+
attributes : list of strings, optional
150+
A list of attributes of the particle_species that should be read and
151+
added as extra columns.
139152
140153
Returns
141154
-------
@@ -172,7 +185,7 @@ def iterations_to_cudf(series, species_name):
172185
cudf.from_pandas(
173186
iteration
174187
.particles[species_name]
175-
.to_df()
188+
.to_df(attributes=attributes)
176189
.assign(iteration=i)
177190
)
178191
for i, iteration in series.snapshots().items()

0 commit comments

Comments
 (0)