Skip to content

Commit 4bb33ea

Browse files
authored
all other docstrings (#1783)
* all other docstrings * docstrings improvements * remove args in class docstrings * put california housing dataset * fix lfw dataset * fix lfw dataset * fix 20 news group * fix cache * fix housing * fix housing
1 parent a7d8e5a commit 4bb33ea

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+4146
-43
lines changed
Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,31 @@
11
import os
2+
import shutil
3+
import zipfile
4+
from pathlib import Path
25

36
from sklearn import datasets
47

58
data_home = datasets.get_data_home()
69
print("Path of download datasets to: ", data_home)
710

8-
print("Download California Housing dataset...")
9-
datasets.fetch_california_housing()
10-
print("Download LFW people dataset...")
11+
# print("Download California Housing dataset...")
12+
# datasets.fetch_california_housing()
13+
print("Unzip LFW people dataset...")
14+
15+
root_dir = Path(__file__).resolve().parents[2]
16+
lfw_zip_path = root_dir / "test_data" / "lfw-dataset.zip"
17+
lfw_home_path = Path(data_home) / "lfw_home"
18+
if not lfw_home_path.exists():
19+
with zipfile.ZipFile(lfw_zip_path, "r") as zip_ref:
20+
zip_ref.extractall(data_home)
21+
22+
1123
datasets.fetch_lfw_people()
12-
print("Download 20 news group dataset...")
24+
print("Copying 20 news group dataset...")
25+
shutil.copy(root_dir / "test_data" / "20news-bydate_py3.pkz", data_home)
1326
datasets.fetch_20newsgroups()
27+
print("Copying California Housing...")
28+
shutil.copy(root_dir / "test_data" / "cal_housing_py3.pkz", data_home)
29+
datasets.fetch_california_housing()
1430
print("Download completed.")
1531
print(f"Content of datasets cache: {os.listdir(data_home)}")

.github/share-actions/get-scipy-datasets/action.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
name: Get bikes dataset cached
2+
inputs:
3+
google_backend_sa_key:
4+
description: 'GCP service account key (base64 encoded)'
5+
required: true
26
runs:
37
using: "composite"
48
steps:
@@ -15,6 +19,20 @@ runs:
1519
uses: astral-sh/setup-uv@v7
1620
with:
1721
python-version: "3.11"
22+
- name: Save GCP SA key
23+
id: save_gcp_key
24+
env:
25+
GOOGLE_BACKEND_SA_KEY: ${{ inputs.google_backend_sa_key }}
26+
run: |
27+
echo "${GOOGLE_BACKEND_SA_KEY}" | base64 -d > gcp-credentials.json
28+
echo "gcp_credentials_path=$(pwd)/gcp-credentials.json" >> "$GITHUB_OUTPUT"
29+
shell: bash
30+
- name: DVC Pull
31+
if: ${{ steps.cache-scipy-data.outputs.cache-hit != 'true' }}
32+
env:
33+
GOOGLE_APPLICATION_CREDENTIALS: ${{ steps.save_gcp_key.outputs.gcp_credentials_path }}
34+
run: uv run --with dvc[gs] dvc pull
35+
shell: bash
1836
- name: Download datasets
1937
if: ${{ steps.cache-scipy-data.outputs.cache-hit != 'true' }}
2038
env:

.github/workflows/main.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ jobs:
128128
uses: ./.github/share-actions/get-bikes-dataset-cached
129129
- name: 🔍 Get scipy dataset cached
130130
uses: ./.github/share-actions/get-scipy-datasets
131+
with:
132+
google_backend_sa_key: ${{ secrets.GOOGLE_BACKEND_SA_KEY }}
131133

132134
test-minimal:
133135
name: Test on minimal requirements

src/evidently/core/container.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,22 +23,53 @@
2323

2424

2525
class MetricContainer(AutoAliasMixin, EvidentlyBaseModel, abc.ABC):
26+
"""Base class for containers that generate multiple metrics.
27+
28+
Metric containers are used to programmatically create multiple related metrics,
29+
such as generating the same metric for multiple columns or creating metric combinations.
30+
Examples include `ColumnMetricGenerator` and preset classes like `DataDriftPreset`.
31+
"""
32+
2633
__alias_type__: ClassVar[str] = "metric_container"
2734

2835
class Config:
2936
is_base_type = True
3037

3138
include_tests: bool = True
39+
"""Whether to include default tests for generated metrics."""
3240

3341
def __init__(self, include_tests: bool = True, **data):
42+
"""Initialize a metric container.
43+
44+
Args:
45+
* `include_tests`: If `True`, generated metrics will include default tests.
46+
"""
3447
self.include_tests = include_tests
3548
super().__init__(**data)
3649

3750
@abc.abstractmethod
3851
def generate_metrics(self, context: "Context") -> Sequence[MetricOrContainer]:
52+
"""Generate metrics based on the container configuration.
53+
54+
Args:
55+
* `context`: `Context` containing datasets and configuration.
56+
57+
Returns:
58+
* Sequence of `Metric` or `MetricContainer` objects to compute.
59+
"""
3960
raise NotImplementedError()
4061

4162
def metrics(self, context: "Context") -> List[MetricOrContainer]:
63+
"""Get all metrics generated by this container.
64+
65+
Results are cached in the context to avoid regenerating on subsequent calls.
66+
67+
Args:
68+
* `context`: `Context` containing datasets and configuration.
69+
70+
Returns:
71+
* List of `Metric` or `MetricContainer` objects.
72+
"""
4273
metric_container_fp = self.get_fingerprint()
4374
metrics = context.metrics_container(metric_container_fp)
4475
if metrics is None:
@@ -51,9 +82,33 @@ def render(
5182
context: "Context",
5283
child_widgets: Optional[List[Tuple[Optional[MetricId], List[BaseWidgetInfo]]]] = None,
5384
) -> List[BaseWidgetInfo]:
85+
"""Render visualization widgets for this container.
86+
87+
Combines widgets from all child metrics/containers.
88+
89+
Args:
90+
* `context`: `Context` containing datasets and configuration.
91+
* `child_widgets`: Optional list of (metric_id, widgets) tuples from child metrics.
92+
93+
Returns:
94+
* List of `BaseWidgetInfo` objects for visualization.
95+
"""
5496
return list(itertools.chain(*[widget[1] for widget in (child_widgets or [])]))
5597

5698
def list_metrics(self, context: "Context") -> Generator[Metric, None, None]:
99+
"""Iterate over all leaf metrics in this container.
100+
101+
Recursively yields all `Metric` objects, flattening nested containers.
102+
103+
Args:
104+
* `context`: `Context` containing datasets and configuration.
105+
106+
Yields:
107+
* `Metric` objects from this container and nested containers.
108+
109+
Raises:
110+
* `ValueError`: If metrics haven't been generated yet.
111+
"""
57112
metrics = context.metrics_container(self.get_fingerprint())
58113
if metrics is None:
59114
raise ValueError("Metrics weren't composed in container")
@@ -66,6 +121,14 @@ def list_metrics(self, context: "Context") -> Generator[Metric, None, None]:
66121
raise ValueError(f"invalid metric type {type(item)}")
67122

68123
def _get_tests(self, tests):
124+
"""Get tests list, handling None and include_tests flag.
125+
126+
Args:
127+
* `tests`: Optional list of tests.
128+
129+
Returns:
130+
* Converted tests list, or None if default tests should be used, or empty list if tests disabled.
131+
"""
69132
if tests is not None:
70133
return convert_tests(tests)
71134
if self.include_tests:
@@ -77,8 +140,21 @@ def _get_tests(self, tests):
77140

78141

79142
class ColumnMetricContainer(MetricContainer, abc.ABC):
143+
"""Base class for metric containers that operate on a specific column.
144+
145+
Simplifies container implementation for containers that generate metrics
146+
for a single column. Subclasses only need to implement `generate_metrics()`.
147+
"""
148+
80149
column: str
150+
"""Name of the column to generate metrics for."""
81151

82152
def __init__(self, column: str, include_tests: bool = True):
153+
"""Initialize a column metric container.
154+
155+
Args:
156+
* `column`: Name of the column to generate metrics for.
157+
* `include_tests`: If `True`, generated metrics will include default tests.
158+
"""
83159
self.column = column
84160
super().__init__(include_tests=include_tests)

0 commit comments

Comments
 (0)