Skip to content

Commit ec41296

Browse files
committed
Merge remote-tracking branch 'origin/develop' into feature/served-pipelines
2 parents 38596be + fecb121 commit ec41296

File tree

16 files changed

+186
-22
lines changed

16 files changed

+186
-22
lines changed

.github/workflows/integration-test-fast-services.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ on:
2525
type: number
2626
required: false
2727
default: 30
28+
reruns:
29+
description: Pytest rerun count (0 disables)
30+
type: number
31+
required: false
32+
default: 3
2833
workflow_dispatch:
2934
inputs:
3035
os:
@@ -72,6 +77,11 @@ on:
7277
type: number
7378
required: false
7479
default: 30
80+
reruns:
81+
description: Pytest rerun count (0 disables)
82+
type: number
83+
required: false
84+
default: 3
7585
jobs:
7686
integration-tests-fast:
7787
name: integration-tests-fast
@@ -97,6 +107,7 @@ jobs:
97107
GCP_US_EAST4_SERVER_URL: ${{ secrets.GCP_US_EAST4_SERVER_URL }}
98108
GCP_US_EAST4_SERVER_USERNAME: ${{ secrets.GCP_US_EAST4_SERVER_USERNAME }}
99109
GCP_US_EAST4_SERVER_PASSWORD: ${{ secrets.GCP_US_EAST4_SERVER_PASSWORD }}
110+
PYTEST_RERUNS: ${{ inputs.reruns }}
100111
if: ${{ ! startsWith(github.event.head_commit.message, 'GitBook:') }}
101112
defaults:
102113
run:

.github/workflows/integration-test-fast.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ on:
2525
type: number
2626
required: false
2727
default: 30
28+
reruns:
29+
description: Pytest rerun count (0 disables)
30+
type: number
31+
required: false
32+
default: 3
2833
workflow_dispatch:
2934
inputs:
3035
os:
@@ -72,6 +77,11 @@ on:
7277
type: number
7378
required: false
7479
default: 30
80+
reruns:
81+
description: Pytest rerun count (0 disables)
82+
type: number
83+
required: false
84+
default: 3
7585
jobs:
7686
integration-tests-fast:
7787
name: integration-tests-fast
@@ -97,6 +107,7 @@ jobs:
97107
GCP_US_EAST4_SERVER_URL: ${{ secrets.GCP_US_EAST4_SERVER_URL }}
98108
GCP_US_EAST4_SERVER_USERNAME: ${{ secrets.GCP_US_EAST4_SERVER_USERNAME }}
99109
GCP_US_EAST4_SERVER_PASSWORD: ${{ secrets.GCP_US_EAST4_SERVER_PASSWORD }}
110+
PYTEST_RERUNS: ${{ inputs.reruns }}
100111
if: ${{ ! startsWith(github.event.head_commit.message, 'GitBook:') }}
101112
defaults:
102113
run:

.github/workflows/integration-test-slow-services.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ on:
2525
type: number
2626
required: false
2727
default: 30
28+
reruns:
29+
description: Pytest rerun count (0 disables)
30+
type: number
31+
required: false
32+
default: 3
2833
workflow_dispatch:
2934
inputs:
3035
os:
@@ -71,6 +76,11 @@ on:
7176
type: number
7277
required: false
7378
default: 30
79+
reruns:
80+
description: Pytest rerun count (0 disables)
81+
type: number
82+
required: false
83+
default: 3
7484
jobs:
7585
integration-tests-slow:
7686
name: integration-tests-slow
@@ -94,6 +104,7 @@ jobs:
94104
GCP_US_EAST4_SERVER_URL: ${{ secrets.GCP_US_EAST4_SERVER_URL }}
95105
GCP_US_EAST4_SERVER_USERNAME: ${{ secrets.GCP_US_EAST4_SERVER_USERNAME }}
96106
GCP_US_EAST4_SERVER_PASSWORD: ${{ secrets.GCP_US_EAST4_SERVER_PASSWORD }}
107+
PYTEST_RERUNS: ${{ inputs.reruns }}
97108
# TODO: add Windows testing for Python 3.11 and 3.12 back in
98109
# TODO: add macos testing back in
99110
if: ${{ ! startsWith(github.event.head_commit.message, 'GitBook:') && ! (inputs.os == 'windows-latest' && inputs.python-version == '3.11') && ! (inputs.os == 'windows-latest' && inputs.python-version == '3.12') && ! (inputs.os == 'macos-13' || inputs.os == 'macos-latest') }}

.github/workflows/integration-test-slow.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ on:
2525
type: number
2626
required: false
2727
default: 30
28+
reruns:
29+
description: Pytest rerun count (0 disables)
30+
type: number
31+
required: false
32+
default: 3
2833
workflow_dispatch:
2934
inputs:
3035
os:
@@ -71,6 +76,11 @@ on:
7176
type: number
7277
required: false
7378
default: 30
79+
reruns:
80+
description: Pytest rerun count (0 disables)
81+
type: number
82+
required: false
83+
default: 3
7484
jobs:
7585
integration-tests-slow:
7686
name: integration-tests-slow
@@ -94,6 +104,7 @@ jobs:
94104
GCP_US_EAST4_SERVER_URL: ${{ secrets.GCP_US_EAST4_SERVER_URL }}
95105
GCP_US_EAST4_SERVER_USERNAME: ${{ secrets.GCP_US_EAST4_SERVER_USERNAME }}
96106
GCP_US_EAST4_SERVER_PASSWORD: ${{ secrets.GCP_US_EAST4_SERVER_PASSWORD }}
107+
PYTEST_RERUNS: ${{ inputs.reruns }}
97108
# TODO: add Windows testing for Python 3.11 and 3.12 back in
98109
if: ${{ ! startsWith(github.event.head_commit.message, 'GitBook:') && ! (inputs.os == 'windows-latest' && inputs.python-version == '3.11') && ! (inputs.os == 'windows-latest' && inputs.python-version == '3.12') }}
99110
defaults:

.github/workflows/unit-test.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ on:
2626
type: string
2727
required: false
2828
default: ''
29+
reruns:
30+
description: Pytest rerun count (0 disables)
31+
type: number
32+
required: false
33+
default: 3
2934
workflow_dispatch:
3035
inputs:
3136
os:
@@ -51,6 +56,11 @@ on:
5156
type: string
5257
required: false
5358
default: ''
59+
reruns:
60+
description: Pytest rerun count (0 disables)
61+
type: number
62+
required: false
63+
default: 3
5464
jobs:
5565
unit-test:
5666
name: unit-test
@@ -61,6 +71,7 @@ jobs:
6171
PYTHONIOENCODING: utf-8
6272
UV_HTTP_TIMEOUT: 600
6373
OBJC_DISABLE_INITIALIZE_FORK_SAFETY: 'YES'
74+
PYTEST_RERUNS: ${{ inputs.reruns }}
6475
if: ${{ ! startsWith(github.event.head_commit.message, 'GitBook:') && ! (inputs.os == 'windows-latest' && inputs.python-version == '3.11') && ! (inputs.os == 'windows-latest' && inputs.python-version == '3.12') }}
6576
defaults:
6677
run:

docs/book/component-guide/data-validators/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ Data Validator are optional stack components provided by integrations. The follo
3434
| [Deepchecks](deepchecks.md) | <p>data quality<br>data drift<br>model drift<br>model performance</p> | <p>tabular: <code>pandas.DataFrame</code><br>CV: <code>torch.utils.data.dataloader.DataLoader</code></p> | <p>tabular: <code>sklearn.base.ClassifierMixin</code><br>CV: <code>torch.nn.Module</code></p> | Add Deepchecks data and model validation tests to your pipelines | `deepchecks` |
3535
| [Evidently](evidently.md) | <p>data quality<br>data drift<br>model drift<br>model performance</p> | tabular: `pandas.DataFrame` | N/A | Use Evidently to generate a variety of data quality and data/model drift reports and visualizations | `evidently` |
3636
| [Great Expectations](great-expectations.md) | <p>data profiling<br>data quality</p> | tabular: `pandas.DataFrame` | N/A | Perform data testing, documentation and profiling with Great Expectations | `great_expectations` |
37-
| [Whylogs/WhyLabs](whylogs.md) | data drift | tabular: `pandas.DataFrame` | N/A | Generate data profiles with whylogs and upload them to WhyLabs | `whylogs` |
37+
| [Whylogs/WhyLabs](whylogs.md) | data drift | tabular: `pandas.DataFrame` | N/A | Generate data profiles with whylogs. Hosted WhyLabs platform is being discontinued after Apple's acquisition—see the integration page for OSS deployment options. | `whylogs` |
3838

3939
If you would like to see the available flavors of Data Validator, you can use the command:
4040

docs/book/component-guide/data-validators/whylogs.md

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@ description: >-
66

77
# Whylogs
88

9-
The whylogs/WhyLabs [Data Validator](./) flavor provided with the ZenML integration uses [whylogs](https://whylabs.ai/whylogs) and [WhyLabs](https://whylabs.ai) to generate and track data profiles, highly accurate descriptive representations of your data. The profiles can be used to implement automated corrective actions in your pipelines, or to render interactive representations for further visual interpretation, evaluation and documentation.
9+
The whylogs/WhyLabs [Data Validator](./) flavor provided with the ZenML integration uses the open-source [whylogs](https://github.com/whylabs/whylogs) library together with the now open-sourced [WhyLabs platform](https://github.com/whylabs/whylabs-oss) to generate and track data profiles, highly accurate descriptive representations of your data. The profiles can be used to implement automated corrective actions in your pipelines, or to render interactive representations for further visual interpretation, evaluation and documentation.
10+
11+
> **Warning:** [WhyLabs was acquired by Apple](https://whylabs.ai/) and the hosted WhyLabs platform is being discontinued. While the whylogs library remains open source and the WhyLabs platform source code is publicly available, hosted deployments may no longer be accessible. Make sure to plan your usage of the integration accordingly and consider self-hosting the OSS platform if you still need WhyLabs features.
1012
1113
### When would you want to use it?
1214

13-
[Whylogs](https://whylabs.ai/whylogs) is an open-source library that analyzes your data and creates statistical summaries called whylogs profiles. Whylogs profiles can be processed in your pipelines and visualized locally or uploaded to the [WhyLabs platform](https://whylabs.ai/), where more in depth analysis can be carried out. Even though [whylogs also supports other data types](https://github.com/whylabs/whylogs#data-types), the ZenML whylogs integration currently only works with tabular data in `pandas.DataFrame` format.
15+
[Whylogs](https://github.com/whylabs/whylogs) is an open-source library that analyzes your data and creates statistical summaries called whylogs profiles. Whylogs profiles can be processed in your pipelines and visualized locally or uploaded to a WhyLabs deployment for more in depth analysis. The official hosted WhyLabs service is being discontinued, but you can continue to operate a WhyLabs instance yourself by using the open-source release at [https://github.com/whylabs/whylabs-oss](https://github.com/whylabs/whylabs-oss). Even though [whylogs also supports other data types](https://github.com/whylabs/whylogs#data-types), the ZenML whylogs integration currently only works with tabular data in `pandas.DataFrame` format.
1416

1517
You should use the whylogs/WhyLabs Data Validator when you need the following data validation features that are possible with whylogs and WhyLabs:
1618

@@ -28,7 +30,7 @@ The whylogs Data Validator flavor is included in the whylogs ZenML integration,
2830
zenml integration install whylogs -y
2931
```
3032

31-
If you don't need to connect to the WhyLabs platform to upload and store the generated whylogs data profiles, the Data Validator stack component does not require any configuration parameters. Adding it to a stack is as simple as running e.g.:
33+
If you don't need to connect to a WhyLabs deployment to upload and store the generated whylogs data profiles, the Data Validator stack component does not require any configuration parameters. Adding it to a stack is as simple as running e.g.:
3234

3335
```shell
3436
# Register the whylogs data validator
@@ -38,7 +40,7 @@ zenml data-validator register whylogs_data_validator --flavor=whylogs
3840
zenml stack register custom_stack -dv whylogs_data_validator ... --set
3941
```
4042

41-
Adding WhyLabs logging capabilities to your whylogs Data Validator is just slightly more complicated, as you also need to create a [ZenML Secret](https://docs.zenml.io/getting-started/deploying-zenml/secret-management) to store the sensitive WhyLabs authentication information in a secure location and then reference the secret in the Data Validator configuration. To generate a WhyLabs access token, you can follow [the official WhyLabs instructions documented here](https://docs.whylabs.ai/docs/whylabs-api/#creating-an-api-token) .
43+
Adding WhyLabs logging capabilities to your whylogs Data Validator is just slightly more complicated, as you also need to create a [ZenML Secret](https://docs.zenml.io/getting-started/deploying-zenml/secret-management) to store the sensitive WhyLabs authentication information in a secure location and then reference the secret in the Data Validator configuration. To generate a WhyLabs access token for a deployment that you host yourself, refer to the guidance in the [WhyLabs OSS repository](https://github.com/whylabs/whylabs-oss).
4244

4345
Then, you can register the whylogs Data Validator with WhyLabs logging capabilities as follows:
4446

@@ -53,7 +55,7 @@ zenml data-validator register whylogs_data_validator --flavor=whylogs \
5355
--authentication_secret=whylabs_secret
5456
```
5557

56-
You'll also need to enable whylabs logging for your custom pipeline steps if you want to upload the whylogs data profiles that they return as artifacts to the WhyLabs platform. This is enabled by default for the standard whylogs step. For custom steps, you can enable WhyLabs logging by setting the `upload_to_whylabs` parameter to `True` in the step configuration, e.g.:
58+
You'll also need to enable whylabs logging for your custom pipeline steps if you want to upload the whylogs data profiles that they return as artifacts to your WhyLabs deployment. This is enabled by default for the standard whylogs step. For custom steps, you can enable WhyLabs logging by setting the `upload_to_whylabs` parameter to `True` in the step configuration, e.g.:
5759

5860
```python
5961
from typing import Annotated
@@ -104,7 +106,7 @@ You can [visualize whylogs profiles](whylogs.md#visualizing-whylogs-profiles) in
104106

105107
#### The whylogs standard step
106108

107-
ZenML wraps the whylogs/WhyLabs functionality in the form of a standard `WhylogsProfilerStep` step. The only field in the step config is a `dataset_timestamp` attribute which is only relevant when you upload the profiles to WhyLabs that uses this field to group and merge together profiles belonging to the same dataset. The helper function `get_whylogs_profiler_step` used to create an instance of this standard step takes in an optional `dataset_id` parameter that is also used only in the context of WhyLabs upload to identify the model in the context of which the profile is uploaded, e.g.:
109+
ZenML wraps the whylogs/WhyLabs functionality in the form of a standard `WhylogsProfilerStep` step. The only field in the step config is a `dataset_timestamp` attribute which is only relevant when you upload the profiles to a WhyLabs deployment that uses this field to group and merge together profiles belonging to the same dataset. The helper function `get_whylogs_profiler_step` used to create an instance of this standard step takes in an optional `dataset_id` parameter that is also used only in the context of WhyLabs uploads to identify the model in the context of which the profile is uploaded, e.g.:
108110

109111
```python
110112
from zenml.integrations.whylogs.steps import get_whylogs_profiler_step
@@ -149,7 +151,7 @@ You can view [the complete list of configuration parameters](https://sdkdocs.zen
149151

150152
The whylogs Data Validator implements the same interface as do all Data Validators, so this method forces you to maintain some level of compatibility with the overall Data Validator abstraction, which guarantees an easier migration in case you decide to switch to another Data Validator.
151153

152-
All you have to do is call the whylogs Data Validator methods when you need to interact with whylogs to generate data profiles. You may optionally enable whylabs logging to automatically upload the returned whylogs profile to WhyLabs, e.g.:
154+
All you have to do is call the whylogs Data Validator methods when you need to interact with whylogs to generate data profiles. You may optionally enable whylabs logging to automatically upload the returned whylogs profile to your WhyLabs deployment, e.g.:
153155

154156
```python
155157

@@ -191,7 +193,7 @@ def data_profiler(
191193
profile = data_validator.data_profiling(
192194
dataset,
193195
)
194-
# optionally upload the profile to WhyLabs, if WhyLabs credentials are configured
196+
# optionally upload the profile to your WhyLabs deployment, if WhyLabs credentials are configured
195197
data_validator.upload_profile_view(profile)
196198

197199
# validation post-processing (e.g. interpret results, take actions) can happen here
@@ -203,7 +205,7 @@ Have a look at [the complete list of methods and parameters available in the `Wh
203205

204206
#### Call whylogs directly
205207

206-
You can use the whylogs library directly in your custom pipeline steps, and only leverage ZenML's capability of serializing, versioning and storing the `DatasetProfileView` objects in its Artifact Store. You may optionally enable whylabs logging to automatically upload the returned whylogs profile to WhyLabs, e.g.:
208+
You can use the whylogs library directly in your custom pipeline steps, and only leverage ZenML's capability of serializing, versioning and storing the `DatasetProfileView` objects in its Artifact Store. You may optionally enable whylabs logging to automatically upload the returned whylogs profile to your WhyLabs deployment, e.g.:
207209

208210
```python
209211

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,9 @@ max-complexity = 18
271271
# Use Google-style docstrings.
272272
convention = "google"
273273

274+
[tool.bandit]
275+
skips = ["B615"]
276+
274277
[tool.mypy]
275278

276279
plugins = ["pydantic.mypy"]

scripts/check-security.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ SRC=${1:-"src/zenml tests examples"}
88
export ZENML_DEBUG=1
99
export ZENML_ANALYTICS_OPT_IN=false
1010

11-
bandit -r $SRC -ll \
11+
bandit -c pyproject.toml \
12+
-r $SRC -ll \
1213
--exclude examples/llm_finetuning/scripts/prepare_alpaca.py
13-

0 commit comments

Comments
 (0)