Skip to content

Commit ff21bbc

Browse files
ax3lfranzpoeschel
andauthored
Python: Series to DataFrame (#1506)
* Python: Series to DataFrame Add new helpers to create Pandas and cuDF dataframes for a single particle species, over all iterations at once. * Remove hard-coded species name Co-authored-by: Franz Pöschel <[email protected]> --------- Co-authored-by: Franz Pöschel <[email protected]>
1 parent fafdac2 commit ff21bbc

File tree

5 files changed

+135
-19
lines changed

5 files changed

+135
-19
lines changed

docs/source/analysis/pandas.rst

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,7 @@ One can also combine all iterations in a single dataframe like this:
4545

4646
.. code-block:: python
4747
48-
import pandas as pd
49-
50-
df = pd.concat(
51-
(
52-
s.iterations[i].particles["electrons"].to_df().assign(iteration=i)
53-
for i in s.iterations
54-
),
55-
axis=0,
56-
ignore_index=True,
57-
)
48+
df = s.to_df("electrons")
5849
5950
# like before but with a new column "iteration" and all particles
6051
print(df)

docs/source/analysis/rapids.rst

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,7 @@ One can also combine all iterations in a single dataframe like this:
5151

5252
.. code-block:: python
5353
54-
cdf = cudf.concat(
55-
(
56-
cudf.from_pandas(s.iterations[i].particles["electrons"].to_df().assign(iteration=i))
57-
for i in s.iterations
58-
),
59-
axis=0,
60-
ignore_index=True,
61-
)
54+
cdf = s.to_cudf("electrons")
6255
6356
# like before but with a new column "iteration" and all particles
6457
print(cdf)

examples/11_particle_dataframe.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,14 @@
1616
except ImportError:
1717
print("pandas NOT found. Install pandas to run this example.")
1818
sys.exit()
19+
20+
found_cudf = False
21+
try:
22+
import cudf
23+
found_cudf = True
24+
except ImportError:
25+
print("cudf NOT found. Install RAPIDS for CUDA DataFrame example.")
26+
1927
found_dask = False
2028
try:
2129
import dask
@@ -39,6 +47,19 @@
3947
df = electrons.to_df(np.s_[:100])
4048
print(df)
4149

50+
# all particles over all steps
51+
df = s.to_df("electrons")
52+
print(df)
53+
54+
if found_cudf:
55+
# all particles - to GPU
56+
cdf = cudf.from_pandas(electrons.to_df())
57+
print(cdf)
58+
59+
# all particles over all steps - to GPU
60+
cdf = s.to_cudf("electrons")
61+
print(cdf)
62+
4263
# Particles
4364
if found_dask:
4465
# the default schedulers are local/threaded, not requiring much.

src/binding/python/openpmd_api/DataFrame.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,111 @@ def particles_to_dataframe(particle_species, slice=None):
7474
df.index.name = "row"
7575

7676
return df
77+
78+
79+
def iterations_to_dataframe(series, species_name):
80+
"""
81+
Load all iterations of a particle species into a Pandas DataFrame.
82+
83+
Parameters
84+
----------
85+
series : openpmd_api.Series
86+
A Series class in openPMD-api.
87+
species_name : string
88+
The name of a particle species.
89+
90+
Returns
91+
-------
92+
pandas.DataFrame
93+
A pandas dataframe with particles as index and openPMD record
94+
components of the particle_species as columns. Particles might be
95+
repeated over multiple iterations and an "iteration" column is
96+
added.
97+
98+
Raises
99+
------
100+
ImportError
101+
Raises an exception if pandas is not installed
102+
103+
See Also
104+
--------
105+
pandas.DataFrame : the central dataframe object created here
106+
"""
107+
# import pandas here for a lazy import
108+
try:
109+
import pandas as pd
110+
except ImportError:
111+
raise ImportError("pandas NOT found. Install pandas for DataFrame "
112+
"support.")
113+
114+
df = pd.concat(
115+
(
116+
series.iterations[i]
117+
.particles[species_name]
118+
.to_df()
119+
.assign(iteration=i)
120+
for i in series.iterations
121+
),
122+
axis=0,
123+
ignore_index=True,
124+
)
125+
126+
return df
127+
128+
129+
def iterations_to_cudf(series, species_name):
130+
"""
131+
Load all iterations of a particle species into a cuDF DataFrame.
132+
133+
Parameters
134+
----------
135+
series : openpmd_api.Series
136+
A Series class in openPMD-api.
137+
species_name : string
138+
The name of a particle species.
139+
140+
Returns
141+
-------
142+
cudf.DataFrame
143+
A cuDF (RAPIDS) dataframe with particles as index and openPMD record
144+
components of the particle_species as columns. Particles might be
145+
repeated over multiple iterations and an "iteration" column is
146+
added.
147+
148+
Raises
149+
------
150+
ImportError
151+
Raises an exception if cuDF (RAPIDS) is not installed
152+
153+
See Also
154+
--------
155+
cudf.DataFrame : the central dataframe object created here
156+
"""
157+
# import pandas here for a lazy import
158+
try:
159+
import pandas # noqa
160+
except ImportError:
161+
raise ImportError("pandas NOT found. Install pandas for DataFrame "
162+
"support.")
163+
# import cudf here for a lazy import
164+
try:
165+
import cudf
166+
except ImportError:
167+
raise ImportError("cudf NOT found. Install RAPIDS for CUDA DataFrame "
168+
"support.")
169+
170+
cdf = cudf.concat(
171+
(
172+
cudf.from_pandas(
173+
series.iterations[i]
174+
.particles[species_name]
175+
.to_df()
176+
.assign(iteration=i)
177+
)
178+
for i in series.iterations
179+
),
180+
axis=0,
181+
ignore_index=True,
182+
)
183+
184+
return cdf

src/binding/python/openpmd_api/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from . import openpmd_api_cxx as cxx
22
from .DaskArray import record_component_to_daskarray
33
from .DaskDataFrame import particles_to_daskdataframe
4-
from .DataFrame import particles_to_dataframe
4+
from .DataFrame import (iterations_to_cudf, iterations_to_dataframe,
5+
particles_to_dataframe)
56
from .openpmd_api_cxx import * # noqa
67

78
__version__ = cxx.__version__
@@ -13,6 +14,8 @@
1314
ParticleSpecies.to_df = particles_to_dataframe # noqa
1415
ParticleSpecies.to_dask = particles_to_daskdataframe # noqa
1516
Record_Component.to_dask_array = record_component_to_daskarray # noqa
17+
Series.to_df = iterations_to_dataframe # noqa
18+
Series.to_cudf = iterations_to_cudf # noqa
1619

1720
# TODO remove in future versions (deprecated)
1821
Access_Type = Access # noqa

0 commit comments

Comments
 (0)