Skip to content

Commit d9c8063

Browse files
authored
Merge pull request #40 from bcdev/forman-39-profiling
Added `profiling` configuration setting
2 parents 5099c3b + 39f2a72 commit d9c8063

File tree

14 files changed

+402
-67
lines changed

14 files changed

+402
-67
lines changed

CHANGES.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
### Enhancements
44

5+
* It is now possible to configure runtime profiling of the `zappend`
6+
processing using the new configuration setting `profiling`. [#39]
7+
58
* Allow for passing custom slice sources via the configuration.
69
The new configuration setting `slice_source` is the name of a class
710
derived from `zappend.api.SliceSource` or a function that creates an instance

docs/config.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,49 @@ Type _boolean_.
187187
If 'true', log only what would have been done, but don't apply any changes.
188188
Defaults to `false`.
189189

190+
### `profiling`
191+
192+
Profiling configuration. Allows for runtime profiling of the processing.
193+
Must be one of the following:
194+
195+
* Type _boolean_.
196+
If set, profiling is enabled and output is logged using level INFO. Otherwise, profiling is disabled.
197+
198+
* Type _string_.
199+
Profile path. Enables profiling and writes a profiling report to given path.
200+
201+
* Type _object_.
202+
203+
* `enabled`:
204+
Type _boolean_.
205+
Enable or disable profiling.
206+
207+
* `path`:
208+
Type _string_.
209+
Local file path for profiling output.
210+
211+
* `log_level`:
212+
Log level. Use 'NOTSET' to disable logging.
213+
Defaults to `"INFO"`.
214+
Must be one of `"CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"`.
215+
216+
* `keys`:
217+
Type _array_.
218+
Sort output according to the supplied column names. Refer to [Stats.sort_stats(*keys)](https://docs.python.org/3/library/profile.html#pstats.Stats.sort_stats).
219+
Defaults to `["tottime"]`.
220+
221+
Must be one of `"calls", "cumulative", "cumtime", "file", "filename", "module", "ncalls", "pcalls", "line", "name", "nfl", "stdname", "time", "tottime"`.
222+
223+
* `restrictions`:
224+
Type _array_.
225+
Used to limit the list down to the significant entries in the profiling report. Refer to [Stats.print_stats(*restrictions)](https://docs.python.org/3/library/profile.html#pstats.Stats.print_stats).
226+
227+
Must be one of the following:
228+
* Type _integer_.
229+
* Type _number_.
230+
* Type _string_.
231+
232+
190233
### `logging`
191234

192235
Type _object_.

docs/guide.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,49 @@ zappend(["slice-1.nc", "slice-2.nc", "slice-3.nc"],
618618
slice_source=MySliceSource)
619619
```
620620

621+
## Profiling
622+
623+
Runtime profiling is very important for understanding program runtime behavior
624+
and performance. The configuration setting `profiling` can be used to
625+
analyse and improve the runtime performance of `zappend` itself as well as
626+
the runtime performance of the computation of in-memory slices passed to the
627+
`zappend()` function.
628+
629+
To log the output of the profiling with level `INFO` (see next section
630+
[Logging](#logging)), you can use the value `true`:
631+
632+
```json
633+
{
634+
"profiling": true
635+
}
636+
```
637+
638+
If you like to see the output in a file too, then set `profiling` to the
639+
desired local file path:
640+
641+
```json
642+
{
643+
"profiling": "perf.out"
644+
}
645+
```
646+
647+
You can also set `profiling` to an object that allows for fine-grained
648+
control of the runtime logging:
649+
650+
```json
651+
{
652+
"profiling": {
653+
"log_level": "WARNING",
654+
"path": "perf.out",
655+
"keys": ["tottime", "time", "ncalls"]
656+
}
657+
}
658+
```
659+
660+
Please refer to section [`profiling`](config.md#profiling) in
661+
the [Configuration Reference](config.md) for more details.
662+
663+
621664
## Logging
622665

623666
The `zappend` logging output is configured using the `logging` setting.

tests/test_api.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Copyright © 2024 Norman Fomferra
22
# Permissions are hereby granted under the terms of the MIT License:
33
# https://opensource.org/licenses/MIT.
4-
4+
import os
55
import unittest
66

77
import xarray as xr
@@ -28,3 +28,29 @@ def test_some_slices(self):
2828
self.assertEqual({"time": 9, "y": 50, "x": 100}, ds.sizes)
2929
self.assertEqual({"chl", "tsm"}, set(ds.data_vars))
3030
self.assertEqual({"time", "y", "x"}, set(ds.coords))
31+
32+
def test_some_slices_with_profiling(self):
33+
slices = [
34+
"memory://slice-1.zarr",
35+
"memory://slice-2.zarr",
36+
"memory://slice-3.zarr",
37+
]
38+
for uri in slices:
39+
make_test_dataset(uri=uri)
40+
41+
target_dir = "memory://target.zarr"
42+
try:
43+
zappend(
44+
slices,
45+
config={
46+
"target_dir": target_dir,
47+
"profiling": {
48+
"path": "prof.out",
49+
"keys": ["tottime", "time", "ncalls"],
50+
},
51+
},
52+
)
53+
self.assertTrue(os.path.exists("prof.out"))
54+
finally:
55+
if os.path.exists("prof.out"):
56+
os.remove("prof.out")

tests/test_cli.py

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Copyright © 2024 Norman Fomferra
22
# Permissions are hereby granted under the terms of the MIT License:
33
# https://opensource.org/licenses/MIT.
4-
import os
4+
55
import unittest
66
from click.testing import CliRunner
77

@@ -104,27 +104,3 @@ def test_some_slices_and_no_target(self):
104104
"Error: Missing 'target_dir' in configuration\n", result.output
105105
)
106106
self.assertFalse(FileObj("memory://target.zarr").exists())
107-
108-
def test_profiling(self):
109-
make_test_dataset(uri="memory://slice-1.zarr")
110-
make_test_dataset(uri="memory://slice-2.zarr")
111-
make_test_dataset(uri="memory://slice-3.zarr")
112-
113-
runner = CliRunner()
114-
# noinspection PyTypeChecker
115-
result = runner.invoke(
116-
zappend,
117-
[
118-
"--profiling",
119-
"prof.out",
120-
"--target",
121-
"memory://target.zarr",
122-
"memory://slice-1.zarr",
123-
"memory://slice-2.zarr",
124-
"memory://slice-3.zarr",
125-
],
126-
)
127-
self.assertEqual("", result.output)
128-
self.assertEqual(0, result.exit_code)
129-
self.assertTrue(FileObj("prof.out").exists())
130-
os.unlink("prof.out")

tests/test_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ def test_get_config_schema(self):
234234
"included_variables",
235235
"logging",
236236
"persist_mem_slices",
237+
"profiling",
237238
"slice_engine",
238239
"slice_polling",
239240
"slice_source",

tests/test_log.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
# Copyright © 2024 Norman Fomferra
22
# Permissions are hereby granted under the terms of the MIT License:
33
# https://opensource.org/licenses/MIT.
4+
45
import logging
56
import unittest
6-
from click.testing import CliRunner
77

8-
from zappend.log import logger
98
from zappend.log import configure_logging
9+
from zappend.log import get_log_level
10+
from zappend.log import logger
1011

1112

1213
class LogTest(unittest.TestCase):
@@ -43,3 +44,13 @@ def test_configure_logging(self):
4344
logger.setLevel(old_level)
4445
for h in old_handlers:
4546
logger.addHandler(h)
47+
48+
def test_get_log_level(self):
49+
self.assertEqual(logging.DEBUG, get_log_level("DEBUG"))
50+
self.assertEqual(logging.INFO, get_log_level("INFO"))
51+
self.assertEqual(logging.WARNING, get_log_level("WARN"))
52+
self.assertEqual(logging.WARNING, get_log_level("WARNING"))
53+
self.assertEqual(logging.ERROR, get_log_level("ERROR"))
54+
self.assertEqual(logging.CRITICAL, get_log_level("CRITICAL"))
55+
self.assertEqual(logging.NOTSET, get_log_level("NOTSET"))
56+
self.assertEqual(logging.NOTSET, get_log_level("CRASS"))

tests/test_profiler.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Copyright © 2024 Norman Fomferra
2+
# Permissions are hereby granted under the terms of the MIT License:
3+
# https://opensource.org/licenses/MIT.
4+
5+
import logging
6+
import os
7+
import unittest
8+
9+
from zappend.profiler import Profiler
10+
11+
12+
class ProfilerTest(unittest.TestCase):
13+
def test_disabled(self):
14+
profiler = Profiler(None)
15+
self.assert_profiler_config(profiler, False)
16+
with profiler:
17+
pass
18+
19+
profiler = Profiler(False)
20+
self.assert_profiler_config(profiler, False)
21+
with profiler:
22+
pass
23+
24+
profiler = Profiler({"enabled": False})
25+
self.assert_profiler_config(profiler, False)
26+
with profiler:
27+
pass
28+
29+
profiler = Profiler({"log_level": "NOTSET"})
30+
self.assert_profiler_config(profiler, False, "NOTSET")
31+
with profiler:
32+
pass
33+
34+
profiler = Profiler({"enabled": True, "log_level": "NOTSET"})
35+
self.assert_profiler_config(profiler, False, "NOTSET")
36+
with profiler:
37+
pass
38+
39+
def test_enabled(self):
40+
profiler = Profiler(True)
41+
self.assert_profiler_config(profiler, True, "INFO")
42+
with profiler:
43+
pass
44+
45+
profiler = Profiler("prof.out")
46+
self.assert_profiler_config(profiler, True, "INFO", "prof.out")
47+
try:
48+
with profiler:
49+
pass
50+
self.assertTrue(os.path.exists("prof.out"))
51+
finally:
52+
if os.path.exists("prof.out"):
53+
os.remove("prof.out")
54+
55+
profiler = Profiler({"path": "prof.out", "log_level": "DEBUG"})
56+
self.assert_profiler_config(profiler, True, "DEBUG", "prof.out")
57+
try:
58+
with profiler:
59+
pass
60+
self.assertTrue(os.path.exists("prof.out"))
61+
finally:
62+
if os.path.exists("prof.out"):
63+
os.remove("prof.out")
64+
65+
def assert_profiler_config(
66+
self,
67+
profiler: Profiler,
68+
expected_enabled,
69+
expected_log_level="INFO",
70+
expected_path=None,
71+
expected_keys=None,
72+
expected_restrictions=None,
73+
):
74+
if expected_keys is None:
75+
expected_keys = ["tottime"]
76+
if expected_restrictions is None:
77+
expected_restrictions = []
78+
self.assertEqual(expected_enabled, profiler.enabled)
79+
self.assertEqual(expected_log_level, profiler.log_level)
80+
self.assertEqual(expected_path, profiler.path)
81+
self.assertEqual(expected_keys, profiler.keys)
82+
self.assertEqual(expected_restrictions, profiler.restrictions)

zappend/api.py

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
# Copyright © 2024 Norman Fomferra
22
# Permissions are hereby granted under the terms of the MIT License:
33
# https://opensource.org/licenses/MIT.
4-
from cProfile import Profile
5-
from io import StringIO
6-
from pstats import Stats
7-
from typing import Iterable, Any, Literal
4+
5+
from typing import Any, Iterable
86

97
from .config import ConfigItem
108
from .config import ConfigLike
@@ -17,7 +15,6 @@
1715
from .slice import SliceSource
1816
from .slice import to_slice_factories
1917
from .slice import to_slice_factory
20-
from .log import logger
2118

2219

2320
__all__ = [
@@ -34,13 +31,10 @@
3431
"to_slice_factory",
3532
]
3633

37-
_TOTAL_TIME: Literal["tottime"] = "tottime"
38-
3934

4035
def zappend(
4136
slices: Iterable[SliceObj | SliceFactory],
4237
config: ConfigLike = None,
43-
profiling=None,
4438
**kwargs: Any,
4539
):
4640
"""
@@ -65,25 +59,8 @@ def zappend(
6559
May be a file path or URI, a ``dict``, ``None``, or a sequence of
6660
the aforementioned. If a sequence is used, subsequent configurations
6761
are incremental to the previous ones.
68-
profiling: Path for profiling output.
69-
Switches on profiling
7062
kwargs: Additional configuration parameters.
7163
Can be used to pass or override configuration values in *config*.
7264
"""
7365
processor = Processor(config, **kwargs)
74-
75-
if profiling:
76-
profile = Profile()
77-
logger.info(f"profiling ...")
78-
profile.enable()
79-
8066
processor.process_slices(slices)
81-
82-
if profiling:
83-
profile.disable()
84-
results = StringIO()
85-
stats = Stats(profile, stream=results)
86-
stats.sort_stats(_TOTAL_TIME).print_stats()
87-
with open(profiling, "w") as f:
88-
f.write(results.getvalue())
89-
logger.info(f"profiling output written to {profiling}")

zappend/cli.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
is_flag=True,
2929
help="Run the tool without creating, changing," " or deleting any files.",
3030
)
31-
@click.option("--profiling", help="Path for profiling output. Switches on profiling.")
3231
@click.option(
3332
"--help-config",
3433
metavar="json|md",
@@ -40,7 +39,6 @@ def zappend(
4039
config: tuple[str, ...],
4140
target: str | None,
4241
dry_run: bool,
43-
profiling: str,
4442
help_config: str | None,
4543
):
4644
"""Create or update a Zarr datacube TARGET from slice datasets SLICES.
@@ -63,13 +61,7 @@ def zappend(
6361

6462
# noinspection PyBroadException
6563
try:
66-
zappend(
67-
slices,
68-
config=config,
69-
target_dir=target,
70-
dry_run=dry_run,
71-
profiling=profiling,
72-
)
64+
zappend(slices, config=config, target_dir=target, dry_run=dry_run)
7365
except BaseException as e:
7466
raise click.ClickException(f"{e}") from e
7567

0 commit comments

Comments
 (0)