Skip to content

Commit 6618fda

Browse files
authored
chore: add github automations (#1)
Add dependabot configuration to manage the Github Action versions. Add CI/CD workflows to verify and publish the package using semantic commits. Add linting and fix typing issues Signed-off-by: Lucas Roesler <[email protected]>
1 parent b7fb965 commit 6618fda

File tree

14 files changed

+330
-22
lines changed

14 files changed

+330
-22
lines changed

.flake8

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[flake8]
2+
count = true
3+
max-line-length = 127
4+
max-complexity = 10
5+
statistics = true
6+
# stop the build if there are Python syntax errors or undefined names
7+
select = E9,F63,F7,F82
8+
show-source = true

.github/dependabot.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Set update schedule for GitHub Actions
2+
3+
version: 2
4+
updates:
5+
- package-ecosystem: "github-actions"
6+
directory: "/"
7+
schedule:
8+
# Check for updates to GitHub Actions every week
9+
interval: "weekly"
10+
commit-message:
11+
prefix: ci

.github/workflows/cicd.yaml

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
name: "CI/CD"
2+
3+
on:
4+
release:
5+
types: [published]
6+
push:
7+
branches:
8+
- main
9+
pull_request:
10+
types:
11+
- synchronize
12+
- opened
13+
- reopened
14+
15+
jobs:
16+
test:
17+
runs-on: ubuntu-latest
18+
name: Test
19+
services:
20+
# Label used to access the service container
21+
postgres:
22+
# Docker Hub image
23+
image: postgres
24+
# Provide the password for postgres
25+
env:
26+
POSTGRES_PASSWORD: postgres_pass
27+
# Set health checks to wait until postgres has started
28+
options: >-
29+
--health-cmd pg_isready
30+
--health-interval 10s
31+
--health-timeout 5s
32+
--health-retries 5
33+
ports:
34+
# Maps tcp port 5432 on service container to the host
35+
- 5432:5432
36+
env:
37+
ENV: cicd
38+
steps:
39+
- name: Checkout
40+
uses: actions/checkout@v3
41+
with:
42+
fetch-depth: 0
43+
- name: Get Metadata
44+
id: metadata
45+
uses: contiamo/git-metadata-action@main
46+
- name: Setup Task
47+
uses: arduino/setup-task@v1
48+
49+
- name: Setup Python
50+
uses: actions/setup-python@v2
51+
with:
52+
python-version: 3.9
53+
54+
- uses: Gr1N/setup-poetry@v8
55+
- uses: actions/cache@v2
56+
with:
57+
path: ~/.cache/pypoetry/virtualenvs
58+
key: ${{ runner.os }}-poetry-${{ hashFiles('poetry.lock') }}
59+
60+
- name: Setup package
61+
run: task setup
62+
63+
- name: Setup database
64+
run: task sample-view
65+
66+
- name: Test package
67+
run: task run
68+
69+
publish:
70+
# Only run if we're on main branch or it's a release:
71+
if: github.event_name == 'release'
72+
needs: [test]
73+
runs-on: ubuntu-latest
74+
name: Build and Publish package
75+
steps:
76+
- name: Checkout
77+
uses: actions/checkout@v3
78+
with:
79+
fetch-depth: 0
80+
- name: Get Metadata
81+
id: metadata
82+
uses: contiamo/git-metadata-action@main
83+
- name: Setup Task
84+
uses: arduino/setup-task@v1
85+
86+
- name: Setup Python
87+
uses: actions/setup-python@v2
88+
with:
89+
python-version: 3.9
90+
91+
- uses: Gr1N/setup-poetry@v8
92+
- uses: actions/cache@v2
93+
with:
94+
path: ~/.cache/pypoetry/virtualenvs
95+
key: ${{ runner.os }}-poetry-${{ hashFiles('poetry.lock') }}
96+
97+
- name: Setup package
98+
run: task setup
99+
100+
- name: Build package
101+
run: task build
102+
103+
- name: Publish package
104+
run: task publish

.github/workflows/commit-titles.yaml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
name: "Conventional commit titles"
2+
on:
3+
pull_request:
4+
types:
5+
# Check title when opened.
6+
- opened
7+
# Check title when new commits are pushed.
8+
# Required to use as a status check.
9+
- synchronize
10+
# When the title or description change
11+
- edited
12+
13+
jobs:
14+
validate:
15+
runs-on: ubuntu-latest
16+
steps:
17+
- uses: actions/github-script@v6
18+
env:
19+
# Ensure pull request titles match the Conventional Commits specification
20+
# https://www.conventionalcommits.org/en/v1.0.0/
21+
regex: '^(feat|fix|chore|ci|refactor|test|docs)(\(.*\))?!?:'
22+
with:
23+
script: |
24+
if (context.eventName != "pull_request") {
25+
core.setFailed("This action only works on pull_request events");
26+
return;
27+
}
28+
core.info(`Checking pull request title with regex: ${process.env.regex}`);
29+
const regex = RegExp(process.env.regex);
30+
const {data: pullRequest} = await github.rest.pulls.get({
31+
owner: context.repo.owner,
32+
repo: context.repo.repo,
33+
pull_number: context.payload.pull_request.number
34+
});
35+
const title = pullRequest.title;
36+
core.info(`Pull Request title: "${title}"`);
37+
if (!regex.test(title)) {
38+
core.setFailed(`Pull Request title "${title}" failed to pass match regex - ${regex}`);
39+
return;
40+
}

.github/workflows/release-please.yaml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
on:
2+
push:
3+
branches:
4+
- main
5+
6+
name: release-please
7+
jobs:
8+
release-please:
9+
runs-on: ubuntu-latest
10+
steps:
11+
- uses: GoogleCloudPlatform/[email protected]
12+
id: release
13+
with:
14+
# use the CI token to pretend not to be a action
15+
token: ${{ secrets.CONTIAMO_CI_TOKEN }}
16+
release-type: python
17+
package-name: ""
18+
bump-minor-pre-major: true
19+
changelog-types: |
20+
[
21+
{"type":"feat","section":"Features","hidden":false},
22+
{"type":"fix","section":"Bug Fixes","hidden":false},
23+
{"type":"chore","section":"Miscellaneous","hidden":false},
24+
{"type":"docs","section":"Miscellaneous","hidden":false},
25+
{"type":"refactor","section":"Miscellaneous","hidden":false}
26+
]
27+
28+
- name: Debug release output
29+
env:
30+
OUTPUTS: ${{ toJSON(steps.release.outputs) }}
31+
run: echo '$OUTPUTS'

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
__pycache__
22
.venv
33
.pytest_cache
4+
.env
5+
dist/
6+
.mypy_cache

Taskfile.yaml

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,39 @@ tasks:
2323
cmds:
2424
- poetry install
2525

26+
build:
27+
desc: build the python package
28+
cmds:
29+
- poetry build
30+
31+
publish:
32+
desc: publish the python package
33+
cmds:
34+
- poetry publish
35+
36+
flake:
37+
internal: true
38+
cmds:
39+
- poetry run flake8 datahub_postgres_lineage
40+
41+
format:
42+
internal: true
43+
cmds:
44+
- poetry run black --check .
45+
# - poetry run isort --check-only .
46+
47+
typings:
48+
internal: true
49+
cmds:
50+
- poetry run mypy .
51+
52+
lint:
53+
desc: lint the python package
54+
deps:
55+
- flake
56+
- format
57+
- typings
58+
2659
wait:
2760
cmds:
2861
- sleep {{.SLEEP}}
@@ -71,7 +104,7 @@ tasks:
71104
sample-view:
72105
desc: create a sample view
73106
cmds:
74-
- psql -f env/local/sample.sql
107+
- psql -f {{.CONFIG_DIR}}/{{.ENV}}/sample.sql
75108
status:
76109
- psql -At -c "select 1 from information_schema.views where table_name = 'names'" | grep "1"
77110
- psql -At -c "select 1 from information_schema.views where table_name = 'emails'" | grep "1"

datahub_postgres_lineage/ingestion.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,38 @@
11
import logging
22
from contextlib import contextmanager
33
from dataclasses import dataclass, field
4-
from typing import Any, Dict, Iterable, List, Optional
4+
from typing import (
5+
Any,
6+
Dict,
7+
Iterable,
8+
Iterator,
9+
List,
10+
Optional,
11+
)
512

613
from datahub.configuration.common import AllowDenyPattern
714
from datahub.emitter import mce_builder
815
from datahub.emitter.mcp_builder import mcps_from_mce
16+
from datahub.ingestion.api.common import PipelineContext
917
from datahub.ingestion.api.decorators import (
1018
SupportStatus,
1119
config_class,
1220
platform_name,
1321
support_status,
1422
)
15-
from datahub.ingestion.api.common import PipelineContext
16-
from datahub.ingestion.api.source import TestableSource, SourceReport
23+
from datahub.ingestion.api.source import SourceReport, TestableSource
1724
from datahub.ingestion.api.workunit import MetadataWorkUnit
1825
from datahub.ingestion.source.sql.postgres import PostgresConfig
19-
from datahub.ingestion.source.sql.sql_common import (
20-
make_sqlalchemy_uri,
21-
)
26+
from datahub.ingestion.source.sql.sql_common import make_sqlalchemy_uri
2227
from datahub.ingestion.source.state.stateful_ingestion_base import (
2328
StatefulIngestionConfigBase,
2429
StatefulIngestionSourceBase,
2530
)
2631
from datahub.utilities.lossy_collections import LossyList
27-
2832
from pydantic import BaseModel, Field, SecretStr
29-
3033
from sqlalchemy import create_engine
31-
from sqlalchemy.engine import Connection, CursorResult
34+
from sqlalchemy.engine import Connection
35+
from sqlalchemy.engine.cursor import CursorResult
3236

3337
logger: logging.Logger = logging.getLogger(__name__)
3438

@@ -89,6 +93,8 @@ def report_dropped(self, ent_name: str) -> None:
8993

9094

9195
class PostgresLineageConfig(StatefulIngestionConfigBase):
96+
options: dict = {}
97+
9298
username: Optional[str] = Field(default=None, description="username")
9399
password: Optional[SecretStr] = Field(
94100
default=None, exclude=True, description="password"
@@ -140,6 +146,9 @@ def get_sql_alchemy_url(self, uri_opts: Optional[Dict[str, Any]] = None) -> str:
140146
@config_class(PostgresLineageConfig)
141147
@support_status(SupportStatus.TESTING)
142148
class PostgresLineageSource(StatefulIngestionSourceBase, TestableSource):
149+
config: PostgresLineageConfig # type: ignore
150+
report: LineageSourceReport # type: ignore
151+
143152
def __init__(self, config: PostgresLineageConfig, ctx: PipelineContext):
144153
super().__init__(config, ctx)
145154
self.platform = "postgres"
@@ -149,7 +158,7 @@ def __init__(self, config: PostgresLineageConfig, ctx: PipelineContext):
149158
### Start required abstract class methods
150159
@classmethod
151160
def create(cls, config_dict, ctx):
152-
config = PostgresConfig.parse_obj(config_dict)
161+
config = PostgresLineageConfig.parse_obj(config_dict)
153162
return cls(config, ctx)
154163

155164
def get_platform_instance_id(self) -> str:
@@ -195,7 +204,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
195204
if len(data) == 0:
196205
return None
197206

198-
lineage_elements = {}
207+
lineage_elements: Dict[str, List[str]] = {}
199208
# Loop over the lineages in the JSON data.
200209
for lineage in data:
201210

@@ -221,12 +230,9 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
221230
lineage_elements[key].append(
222231
mce_builder.make_dataset_urn(
223232
"postgres",
224-
".".join(
225-
[
226-
self.config.database_alias or self.config.database,
227-
lineage.source_schema,
228-
lineage.source_table,
229-
]
233+
self.config.get_identifier(
234+
lineage.source_schema,
235+
lineage.source_table,
230236
),
231237
self.config.env,
232238
)
@@ -240,7 +246,10 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
240246
# Construct a lineage object.
241247
urn = mce_builder.make_dataset_urn(
242248
"postgres",
243-
".".join([self.config.database, dependent_schema, dependent_view]),
249+
self.config.get_identifier(
250+
lineage.dependent_schema,
251+
lineage.dependent_view,
252+
),
244253
self.config.env,
245254
)
246255

@@ -257,7 +266,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
257266
yield wu
258267

259268
@contextmanager
260-
def _get_connection(self) -> Connection:
269+
def _get_connection(self) -> Iterator[Connection]:
261270
# This method can be overridden in the case that you want to dynamically
262271
# run on multiple databases.
263272

env/cicd/.env

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
PGHOST=localhost
2+
PGPORT=5432
3+
PGDATABASE=postgres
4+
PGUSER=postgres
5+
PGPASSWORD=postgres_pass
6+
7+
DATAHUB_TELEMETRY_ENABLED="false"

0 commit comments

Comments
 (0)