Skip to content

Commit c11e1ec

Browse files
committed
add secrets masking for github actions ci environments
1 parent 430adfb commit c11e1ec

File tree

1 file changed

+112
-5
lines changed

1 file changed

+112
-5
lines changed

airbyte_cdk/cli/airbyte_cdk/_secrets.py

Lines changed: 112 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,24 @@
2323
uvx airbyte-cdk secrets fetch ...
2424
```
2525
26-
The 'fetch' command retrieves secrets from Google Secret Manager based on connector
26+
The command retrieves secrets from Google Secret Manager based on connector
2727
labels and writes them to the connector's `secrets` directory.
2828
"""
2929

3030
from __future__ import annotations
3131

3232
import json
33+
import logging
3334
import os
35+
from functools import lru_cache
3436
from pathlib import Path
35-
from typing import cast
37+
from typing import Any, cast
3638

39+
import requests
3740
import rich_click as click
41+
import yaml
3842
from click import style
43+
from numpy import isin
3944
from rich.console import Console
4045
from rich.table import Table
4146

@@ -46,7 +51,9 @@
4651

4752
AIRBYTE_INTERNAL_GCP_PROJECT = "dataline-integration-testing"
4853
CONNECTOR_LABEL = "connector"
54+
GLOBAL_MASK_KEYS_URL = "https://connectors.airbyte.com/files/registries/v0/specs_secrets_mask.yaml"
4955

56+
logger = logging.getLogger("airbyte-cdk.cli.secrets")
5057

5158
try:
5259
from google.cloud import secretmanager_v1 as secretmanager
@@ -83,10 +90,18 @@ def secrets_cli_group() -> None:
8390
default=AIRBYTE_INTERNAL_GCP_PROJECT,
8491
help=f"GCP project ID. Defaults to '{AIRBYTE_INTERNAL_GCP_PROJECT}'.",
8592
)
93+
@click.option(
94+
"--print-ci-secrets-masks",
95+
help="Print GitHub CI mask for secrets.",
96+
type=bool,
97+
is_flag=True,
98+
default=False,
99+
)
86100
def fetch(
87101
connector_name: str | None = None,
88102
connector_directory: Path | None = None,
89103
gcp_project_id: str = AIRBYTE_INTERNAL_GCP_PROJECT,
104+
print_ci_secrets_masks: bool = False,
90105
) -> None:
91106
"""Fetch secrets for a connector from Google Secret Manager.
92107
@@ -96,8 +111,15 @@ def fetch(
96111
If no connector name or directory is provided, we will look within the current working
97112
directory. If the current working directory is not a connector directory (e.g. starting
98113
with 'source-') and no connector name or path is provided, the process will fail.
114+
115+
The `--print-ci-secrets-masks` option will print the GitHub CI mask for the secrets.
116+
This is useful for masking secrets in CI logs.
117+
118+
WARNING: This action causes the secrets to be printed in clear text the logs. For security
119+
reasons, this function will only execute if the `CI` environment variable is set. Otherwise,
120+
masks will not be printed.
99121
"""
100-
click.echo("Fetching secrets...")
122+
click.echo("Fetching secrets...", err=True)
101123

102124
client = _get_gsm_secrets_client()
103125
connector_name, connector_directory = resolve_connector_name_and_directory(
@@ -125,7 +147,7 @@ def fetch(
125147
client=client,
126148
file_path=secret_file_path,
127149
)
128-
click.echo(f"Secret written to: {secret_file_path.absolute()!s}")
150+
click.echo(f"Secret written to: {secret_file_path.absolute()!s}", err=True)
129151
secret_count += 1
130152

131153
if secret_count == 0:
@@ -134,6 +156,23 @@ def fetch(
134156
err=True,
135157
)
136158

159+
if not print_ci_secrets_masks:
160+
return
161+
162+
if not os.environ.get("CI", None):
163+
click.echo(
164+
"The `--print-ci-secrets-masks` option is only available in CI environments. "
165+
"The `CI` env var is either not set or not set to a truthy value. "
166+
"Skipping printing secret masks.",
167+
err=True,
168+
)
169+
return
170+
171+
# Else print the CI mask
172+
_print_ci_secrets_masks(
173+
secrets_dir=secrets_dir,
174+
)
175+
137176

138177
@secrets_cli_group.command("list")
139178
@click.option(
@@ -166,7 +205,7 @@ def list_(
166205
directory. If the current working directory is not a connector directory (e.g. starting
167206
with 'source-') and no connector name or path is provided, the process will fail.
168207
"""
169-
click.echo("Fetching secrets...")
208+
click.echo("Scanning secrets...", err=True)
170209

171210
connector_name = connector_name or resolve_connector_name(
172211
connector_directory=connector_directory or Path().resolve().absolute(),
@@ -310,3 +349,71 @@ def _get_gsm_secrets_client() -> "secretmanager.SecretManagerServiceClient": #
310349
json.loads(credentials_json)
311350
),
312351
)
352+
353+
354+
def _print_ci_secrets_masks(
355+
secrets_dir: Path,
356+
) -> None:
357+
"""Print GitHub CI mask for secrets.
358+
359+
https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#example-masking-an-environment-variable
360+
361+
The env var `CI` is set to a truthy value in GitHub Actions, so we can use it to
362+
determine if we are in a CI environment. If not, we don't want to print the masks,
363+
as it will cause the secrets to be printed in clear text to STDOUT.
364+
"""
365+
if not os.environ.get("CI", None):
366+
click.echo(
367+
"The `--print-ci-secrets-masks` option is only available in CI environments. "
368+
"The `CI` env var is either not set or not set to a truthy value. "
369+
"Skipping printing secret masks.",
370+
err=True,
371+
)
372+
return
373+
374+
for secret_file_path in secrets_dir.glob("*.json"):
375+
config_dict = json.loads(secret_file_path.read_text())
376+
_print_ci_secrets_masks_for_config(config=config_dict)
377+
378+
379+
def _print_ci_secrets_masks_for_config(
380+
config: dict[str, str] | list | Any,
381+
) -> None:
382+
"""Print GitHub CI mask for secrets config, navigating child nodes recursively."""
383+
if isinstance(config, list):
384+
for item in config:
385+
_print_ci_secrets_masks_for_config(item)
386+
387+
if isinstance(config, dict):
388+
for key, value in config.items():
389+
if _is_secret_property(key):
390+
logger.debug(f"Masking secret for config key: {key}")
391+
print(f"::add-mask::{value!s}")
392+
if isinstance(value, dict):
393+
# For nested dicts, we also need to mask the json-stringified version
394+
print(f"::add-mask::{json.dumps(value)!s}")
395+
396+
if isinstance(value, dict | list):
397+
_print_ci_secrets_masks_for_config(config=value)
398+
399+
400+
def _is_secret_property(property_name: str) -> bool:
401+
"""Check if the property name is in the list of properties to mask."""
402+
names_to_mask: list[str] = _get_spec_mask()
403+
if any([property_name.lower() in mask.lower() for mask in names_to_mask]):
404+
return True
405+
406+
return False
407+
408+
409+
@lru_cache
410+
def _get_spec_mask() -> list[str]:
411+
"""Get the list of properties to mask from the spec mask file."""
412+
response = requests.get(GLOBAL_MASK_KEYS_URL, allow_redirects=True)
413+
if not response.ok:
414+
logger.error(f"Failed to fetch spec mask: {response.content}")
415+
try:
416+
return cast(list[str], yaml.safe_load(response.content)["properties"])
417+
except Exception as e:
418+
logger.error(f"Failed to parse spec mask: {e}")
419+
raise

0 commit comments

Comments
 (0)