Skip to content

Commit b8bc5bd

Browse files
TiGribpre-commit-ci[bot]dolfinus
authored
[DOP-24001] Migrate from flake8 to ruff (#168)
* [DOP-24001] ruff check over data_rentgen/ folder * [DOP-24001] update github workflow * [DOP-24001] update github workflow * [DOP-24001] Add some igmores * [DOP-24001] fixes * [DOP-24001] mypy fixes * [DOP-24001] ruff fixes * [DOP-24001] move ruff from actions to precommit-ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [DOP-24001] fix too long line * [DOP-24001] specify dir in precommit * [DOP-24001] ruff over tests * Update data_rentgen/consumer/extractors/dataset.py warning message Co-authored-by: Maxim Martynov <msmarty5@mts.ru> * [DOP-24001] ruff over tests * [DOP-24001] ruff over tests --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Maxim Martynov <msmarty5@mts.ru>
1 parent ce4efa5 commit b8bc5bd

File tree

145 files changed

+875
-817
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

145 files changed

+875
-817
lines changed

.github/workflows/codeql-analysis.yml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,9 @@ jobs:
5151
run: |
5252
poetry install --no-root --all-extras --with dev --without docs,test
5353
54-
- name: Run flake8
55-
run: poetry run flake8 data_rentgen/
56-
5754
- name: Run mypy
5855
run: poetry run mypy ./data_rentgen --config-file ./pyproject.toml
5956

60-
6157
codeql:
6258
name: CodeQL
6359
runs-on: ubuntu-latest
@@ -75,7 +71,7 @@ jobs:
7571
with:
7672
python-version: ${{ env.DEFAULT_PYTHON }}
7773

78-
# Initializes the CodeQL tools for scanning.
74+
# Initializes the CodeQL tools for scanning.
7975
- name: Initialize CodeQL
8076
uses: github/codeql-action/init@v3
8177
with:

.pre-commit-config.yaml

Lines changed: 5 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -63,23 +63,12 @@ repos:
6363
hooks:
6464
- id: add-trailing-comma
6565

66-
- repo: https://github.com/pycqa/autoflake
67-
rev: v2.3.1
66+
- repo: https://github.com/astral-sh/ruff-pre-commit
67+
rev: v0.9.2
6868
hooks:
69-
- id: autoflake
70-
args: [--in-place]
71-
72-
- repo: https://github.com/psf/black-pre-commit-mirror
73-
rev: 25.1.0
74-
hooks:
75-
- id: black
76-
77-
- repo: https://github.com/asottile/blacken-docs
78-
rev: 1.19.1
79-
hooks:
80-
- id: blacken-docs
81-
additional_dependencies:
82-
- black==24.8.0
69+
- id: ruff
70+
args: [--fix]
71+
- id: ruff-format
8372

8473
- repo: https://github.com/pycqa/bandit
8574
rev: 1.8.3
@@ -91,12 +80,6 @@ repos:
9180
- -ll
9281
require_serial: true
9382

94-
- repo: https://github.com/pycqa/isort
95-
rev: 6.0.0
96-
hooks:
97-
- id: isort
98-
name: isort
99-
10083
- repo: https://github.com/IamTheFij/docker-pre-commit
10184
rev: v3.0.1
10285
hooks:
@@ -128,14 +111,6 @@ repos:
128111

129112
- repo: local
130113
hooks:
131-
- id: flake8
132-
name: flake8
133-
entry: flake8
134-
language: python
135-
types: [python]
136-
files: ^data_rentgen/.*$
137-
pass_filenames: true
138-
139114
- id: mypy
140115
name: mypy
141116
entry: mypy ./data_rentgen --config-file ./pyproject.toml
@@ -151,7 +126,6 @@ repos:
151126

152127
ci:
153128
skip:
154-
- flake8 # checked with Github Actions
155129
- mypy # checked with Github Actions
156130
- docker-compose-check # cannot run on pre-commit.ci
157131
- chmod # failing in pre-commit.ci

data_rentgen/consumer/extractors/__init__.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,17 @@
1616
from data_rentgen.consumer.extractors.schema import extract_schema
1717

1818
__all__ = [
19+
"BatchExtractionResult",
20+
"connect_dataset_with_symlinks",
21+
"extract_batch",
1922
"extract_column_lineage",
20-
"extract_dataset_and_symlinks",
2123
"extract_dataset",
22-
"connect_dataset_with_symlinks",
24+
"extract_dataset_and_symlinks",
25+
"extract_input",
2326
"extract_job",
24-
"extract_run",
25-
"extract_run_minimal",
2627
"extract_operation",
27-
"extract_input",
2828
"extract_output",
29+
"extract_run",
30+
"extract_run_minimal",
2931
"extract_schema",
30-
"extract_batch",
31-
"BatchExtractionResult",
3232
]

data_rentgen/consumer/extractors/batch.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
)
4242

4343

44-
class BatchExtractionResult: # noqa: WPS338, WPS214
44+
class BatchExtractionResult:
4545
"""Track results of batch extraction.
4646
4747
Calling any ``add_*`` method will add DTO item to the result, including nested DTOs,
@@ -74,7 +74,7 @@ def __init__(self):
7474

7575
def __repr__(self):
7676
return (
77-
"ExtractionResult(" # noqa: WPS237
77+
"ExtractionResult("
7878
f"locations={len(self._locations)}, "
7979
f"datasets={len(self._datasets)}, "
8080
f"dataset_symlinks={len(self._dataset_symlinks)}, "
@@ -90,7 +90,7 @@ def __repr__(self):
9090
)
9191

9292
@staticmethod
93-
def _add(context: dict[tuple, T], new_item: T) -> dict[tuple, T]: # noqa: WPS602
93+
def _add(context: dict[tuple, T], new_item: T) -> dict[tuple, T]:
9494
key = new_item.unique_key
9595
if key in context:
9696
old_item = context[key]
@@ -130,12 +130,12 @@ def add_operation(self, operation: OperationDTO):
130130
self._add(self._operations, operation)
131131
self.add_run(operation.run)
132132

133-
def add_input(self, input: InputDTO):
134-
self._add(self._inputs, input)
135-
self.add_operation(input.operation)
136-
self.add_dataset(input.dataset)
137-
if input.schema:
138-
self.add_schema(input.schema)
133+
def add_input(self, input_: InputDTO):
134+
self._add(self._inputs, input_)
135+
self.add_operation(input_.operation)
136+
self.add_dataset(input_.dataset)
137+
if input_.schema:
138+
self.add_schema(input_.schema)
139139

140140
def add_output(self, output: OutputDTO):
141141
self._add(self._outputs, output)
@@ -196,12 +196,12 @@ def _get_operation(self, operation_key: tuple) -> OperationDTO:
196196
return operation
197197

198198
def _get_input(self, input_key: tuple) -> InputDTO:
199-
input = self._inputs[input_key]
200-
input.operation = self._get_operation(input.operation.unique_key)
201-
input.dataset = self._get_dataset(input.dataset.unique_key)
202-
if input.schema:
203-
input.schema = self._get_schema(input.schema.unique_key)
204-
return input
199+
input_ = self._inputs[input_key]
200+
input_.operation = self._get_operation(input_.operation.unique_key)
201+
input_.dataset = self._get_dataset(input_.dataset.unique_key)
202+
if input_.schema:
203+
input_.schema = self._get_schema(input_.schema.unique_key)
204+
return input_
205205

206206
def _get_output(self, output_key: tuple) -> OutputDTO:
207207
output = self._outputs[output_key]
@@ -252,23 +252,23 @@ def users(self) -> list[UserDTO]:
252252
return list(map(self._get_user, self._users))
253253

254254

255-
def extract_batch(events: list[OpenLineageRunEvent]) -> BatchExtractionResult: # noqa: WPS231
255+
def extract_batch(events: list[OpenLineageRunEvent]) -> BatchExtractionResult:
256256
result = BatchExtractionResult()
257257

258258
for event in events:
259259
if event.job.facets.jobType and event.job.facets.jobType.jobType == OpenLineageJobType.JOB:
260260
operation = extract_operation(event)
261261
result.add_operation(operation)
262262
for input_dataset in event.inputs:
263-
input, symlinks = extract_input(operation, input_dataset)
264-
result.add_input(input)
263+
input_, symlinks = extract_input(operation, input_dataset)
264+
result.add_input(input_)
265265
for symlink in symlinks:
266266
result.add_dataset_symlink(symlink)
267267

268268
for output_dataset in event.outputs:
269269
output, symlinks = extract_output(operation, output_dataset)
270270
result.add_output(output)
271-
for symlink in symlinks: # noqa: WPS440
271+
for symlink in symlinks:
272272
result.add_dataset_symlink(symlink)
273273

274274
for dataset in event.inputs + event.outputs:

data_rentgen/consumer/extractors/column_lineage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def extract_column_lineage(operation: OperationDTO, target_dataset: OpenLineageD
6767

6868
column_lineage_key = (source_dataset_dto.unique_key, target_dataset_dto.unique_key)
6969
for transformation in input_field.transformations:
70-
# OL integration for Spark before v1.23 (or with columnLineage.datasetLineageEnabled=false, which is still default)
70+
# OL integration for Spark before v1.23 (or with columnLineage.datasetLineageEnabled=false, which is still default) # noqa: E501
7171
# produced INDIRECT lineage for each combination source_column x target_column,
7272
# which is amlost the cartesian join. It is VERY expensive to handle, just ignore.
7373
# See https://github.com/OpenLineage/OpenLineage/pull/3097

data_rentgen/consumer/extractors/dataset.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@
3232
def connect_dataset_with_symlinks(
3333
dataset: DatasetDTO,
3434
symlink: DatasetDTO,
35-
type: OpenLineageSymlinkType,
35+
type_: OpenLineageSymlinkType,
3636
) -> list[DatasetSymlinkDTO]:
3737
result = []
38-
is_metastore_symlink = type == OpenLineageSymlinkType.TABLE
38+
is_metastore_symlink = type_ == OpenLineageSymlinkType.TABLE
3939

4040
result.append(
4141
DatasetSymlinkDTO(
@@ -76,13 +76,14 @@ def extract_dataset_and_symlinks(dataset: OpenLineageDataset) -> tuple[DatasetDT
7676
if table_symlinks:
7777
# We are swapping the dataset with its TABLE symlink to create a cleaner lineage.
7878
# For example, by replacing an HDFS file with its corresponding Hive table.
79-
# This ensures that all operations interact with a single table instead of multiple files (which may represent different partitions).
79+
# This ensures that all operations interact with a single table instead of multiple files (which may represent different partitions). # noqa: E501
8080
# Discussion on this issue: https://github.com/OpenLineage/OpenLineage/issues/2718
8181

8282
# TODO: add support for multiple TABLE symlinks
8383
if len(table_symlinks) > 1:
8484
logger.warning(
85-
"Dataset has more than one TABLE symlink. Only the first one will be used for replacement. Symlink name: %s",
85+
"Dataset has more than one TABLE symlink. "
86+
"Only the first one will be used for replacement. Symlink name: %s",
8687
table_symlinks[0].name,
8788
)
8889
table_dataset_dto = extract_dataset(table_symlinks[0])

data_rentgen/consumer/extractors/output.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def extract_output(
1313
dataset: OpenLineageOutputDataset,
1414
) -> tuple[OutputDTO, list[DatasetSymlinkDTO]]:
1515
lifecycle_change = dataset.facets.lifecycleStateChange
16-
if lifecycle_change:
16+
if lifecycle_change: # noqa: SIM108
1717
output_type = OutputTypeDTO(lifecycle_change.lifecycleStateChange)
1818
else:
1919
output_type = OutputTypeDTO.APPEND

data_rentgen/consumer/extractors/run.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def enrich_run_identifiers(run: RunDTO, event: OpenLineageRunEvent) -> RunDTO:
8484
return run
8585

8686

87-
def enrich_run_logs(run: RunDTO, event: OpenLineageRunEvent) -> RunDTO: # noqa: WPS231
87+
def enrich_run_logs(run: RunDTO, event: OpenLineageRunEvent) -> RunDTO: # noqa: C901, PLR0912
8888
spark_application_details = event.run.facets.spark_applicationDetails
8989
if spark_application_details:
9090
if spark_application_details.proxyUrl:
@@ -142,7 +142,7 @@ def enrich_run_logs(run: RunDTO, event: OpenLineageRunEvent) -> RunDTO: # noqa:
142142
return run
143143

144144

145-
def get_airflow_2_3_plus_dag_run_url( # noqa: WPS114
145+
def get_airflow_2_3_plus_dag_run_url(
146146
namespace: str,
147147
airflow_dag_run_facet: OpenLineageAirflowDagRunFacet,
148148
) -> str:
@@ -153,7 +153,7 @@ def get_airflow_2_3_plus_dag_run_url( # noqa: WPS114
153153
return f"{namespace}/dags/{dag_id}/grid?dag_run_id={dag_run_id}"
154154

155155

156-
def get_airflow_2_x_dag_run_url( # noqa: WPS114
156+
def get_airflow_2_x_dag_run_url(
157157
namespace: str,
158158
airflow_dag_run_facet: OpenLineageAirflowDagRunFacet,
159159
) -> str:
@@ -163,7 +163,7 @@ def get_airflow_2_x_dag_run_url( # noqa: WPS114
163163
return f"{namespace}/graph?dag_id={dag_id}&execution_date={execution_date}"
164164

165165

166-
def get_airflow_2_9_plus_task_log_url( # noqa: WPS114
166+
def get_airflow_2_9_plus_task_log_url(
167167
namespace: str,
168168
airflow_task_run_facet: OpenLineageAirflowTaskRunFacet,
169169
) -> str:
@@ -178,7 +178,7 @@ def get_airflow_2_9_plus_task_log_url( # noqa: WPS114
178178
return f"{namespace}/dags/{dag_id}/grid?tab=logs&dag_run_id={dag_run_id}&task_id={task_id}&map_index={map_index}"
179179

180180

181-
def get_airflow_2_x_task_log_url( # noqa: WPS114
181+
def get_airflow_2_x_task_log_url(
182182
namespace: str,
183183
airflow_task_run_facet: OpenLineageAirflowTaskRunFacet,
184184
) -> str:

data_rentgen/consumer/openlineage/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
So we have to write our own deserialization logic.
77
88
Also FastStream support only ``pydantic`` models whether openlineage-python provides ``attrs`` models.
9-
"""
9+
""" # noqa: E501

data_rentgen/consumer/openlineage/dataset_facets/__init__.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,27 +44,27 @@
4444
)
4545

4646
__all__ = [
47+
"OpenLineageColumnLineageDatasetFacet",
48+
"OpenLineageColumnLineageDatasetFacetField",
49+
"OpenLineageColumnLineageDatasetFacetFieldRef",
50+
"OpenLineageColumnLineageDatasetFacetFieldTransformation",
4751
"OpenLineageDatasetFacet",
52+
"OpenLineageDatasetFacets",
53+
"OpenLineageDatasetLifecycleStateChange",
54+
"OpenLineageDatasetPreviousIdentifier",
4855
"OpenLineageDatasourceDatasetFacet",
4956
"OpenLineageDocumentationDatasetFacet",
50-
"OpenLineageLifecycleStateChangeDatasetFacet",
51-
"OpenLineageDatasetPreviousIdentifier",
52-
"OpenLineageDatasetLifecycleStateChange",
57+
"OpenLineageInputDatasetFacets",
5358
"OpenLineageInputStatisticsInputDatasetFacet",
59+
"OpenLineageLifecycleStateChangeDatasetFacet",
60+
"OpenLineageOutputDatasetFacets",
5461
"OpenLineageOutputStatisticsOutputDatasetFacet",
5562
"OpenLineageSchemaDatasetFacet",
5663
"OpenLineageSchemaField",
5764
"OpenLineageStorageDatasetFacet",
58-
"OpenLineageSymlinksDatasetFacet",
59-
"OpenLineageSymlinkType",
6065
"OpenLineageSymlinkIdentifier",
61-
"OpenLineageDatasetFacets",
62-
"OpenLineageInputDatasetFacets",
63-
"OpenLineageOutputDatasetFacets",
64-
"OpenLineageColumnLineageDatasetFacet",
65-
"OpenLineageColumnLineageDatasetFacetField",
66-
"OpenLineageColumnLineageDatasetFacetFieldRef",
67-
"OpenLineageColumnLineageDatasetFacetFieldTransformation",
66+
"OpenLineageSymlinkType",
67+
"OpenLineageSymlinksDatasetFacet",
6868
]
6969

7070

0 commit comments

Comments
 (0)