Skip to content

Commit f3ce2a9

Browse files
cerwailiuxiaotong
andauthored
fix: resolve all ruff lint errors (#4)
Co-authored-by: liuxiaotong <liuxiaotong@knowlyr.com>
1 parent b375943 commit f3ce2a9

File tree

9 files changed

+15
-25
lines changed

9 files changed

+15
-25
lines changed

src/datarecipe/cli/tools.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,7 +1244,7 @@ def pii(dataset_id: str, sample_size: int, pii_types: tuple, as_json: bool, outp
12441244
risk_colors = {"high": "red", "medium": "yellow", "low": "green", "none": "green"}
12451245
color = risk_colors.get(report.risk_level, "white")
12461246

1247-
console.print(f"\n[bold]PII Detection Report[/bold]")
1247+
console.print("\n[bold]PII Detection Report[/bold]")
12481248
console.print(f" Samples scanned: {report.total_samples}")
12491249
console.print(f" Samples with PII: {report.samples_with_pii}")
12501250
console.print(f" PII ratio: {report.pii_ratio * 100:.1f}%")
@@ -1270,7 +1270,7 @@ def pii(dataset_id: str, sample_size: int, pii_types: tuple, as_json: bool, outp
12701270
console.print(table)
12711271

12721272
if report.recommendations:
1273-
console.print(f"\n[bold cyan]Recommendations:[/bold cyan]")
1273+
console.print("\n[bold cyan]Recommendations:[/bold cyan]")
12741274
for rec in report.recommendations:
12751275
console.print(f" - {rec}")
12761276

@@ -1328,12 +1328,12 @@ def ira(
13281328

13291329
with console.status(f"[cyan]Analyzing agreement in {dataset_id}...[/cyan]"):
13301330
try:
1331-
kwargs = dict(
1332-
item_field=item_field,
1333-
annotator_field=annotator_field,
1334-
label_field=label_field,
1335-
data_format=data_format,
1336-
)
1331+
kwargs = {
1332+
"item_field": item_field,
1333+
"annotator_field": annotator_field,
1334+
"label_field": label_field,
1335+
"data_format": data_format,
1336+
}
13371337
if is_local:
13381338
report = analyzer.analyze_from_file(
13391339
str(local_path.resolve()), sample_size=sample_size, **kwargs,
@@ -1372,7 +1372,7 @@ def ira(
13721372
}
13731373
color = quality_colors.get(report.quality_level, "white")
13741374

1375-
console.print(f"\n[bold]Inter-Rater Agreement Report[/bold]")
1375+
console.print("\n[bold]Inter-Rater Agreement Report[/bold]")
13761376
console.print(f" Items analyzed: {report.total_items}")
13771377
console.print(f" Total annotations: {report.total_annotations}")
13781378
console.print(f" Annotators: {report.n_annotators}")
@@ -1414,7 +1414,7 @@ def ira(
14141414
console.print(table)
14151415

14161416
if report.recommendations:
1417-
console.print(f"\n[bold cyan]Recommendations:[/bold cyan]")
1417+
console.print("\n[bold cyan]Recommendations:[/bold cyan]")
14181418
for rec in report.recommendations:
14191419
console.print(f" - {rec}")
14201420

src/datarecipe/comparator.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from datarecipe.quality_metrics import QualityAnalyzer, QualityReport
99
from datarecipe.schema import GenerationType, Recipe, SourceType
1010

11-
1211
# ==================== Similarity dataclasses ====================
1312

1413

@@ -599,7 +598,7 @@ def _quality_similarity(self, a: DatasetMetrics, b: DatasetMetrics) -> float:
599598
b.quality.complexity.vocabulary_richness,
600599
b.quality.overall_score / 100.0,
601600
]
602-
dist = math.sqrt(sum((x - y) ** 2 for x, y in zip(dims_a, dims_b)))
601+
dist = math.sqrt(sum((x - y) ** 2 for x, y in zip(dims_a, dims_b, strict=False)))
603602
max_dist = math.sqrt(len(dims_a))
604603
return max(0.0, 1.0 - dist / max_dist)
605604

src/datarecipe/ira_analyzer.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from dataclasses import dataclass, field
55
from itertools import combinations
66

7-
87
# ==================== Dataclasses ====================
98

109

@@ -389,7 +388,7 @@ def _cohen_kappa(self, labels_a: list[str], labels_b: list[str]) -> float:
389388
all_labels = sorted(set(labels_a) | set(labels_b))
390389

391390
# Observed agreement
392-
po = sum(1 for a, b in zip(labels_a, labels_b) if a == b) / n
391+
po = sum(1 for a, b in zip(labels_a, labels_b, strict=False) if a == b) / n
393392

394393
# Expected agreement
395394
pe = 0.0
@@ -538,7 +537,7 @@ def _compute_pairwise(
538537

539538
kappa = self._cohen_kappa(labels_a, labels_b)
540539
n = len(common_items)
541-
agree = sum(1 for a, b in zip(labels_a, labels_b) if a == b)
540+
agree = sum(1 for a, b in zip(labels_a, labels_b, strict=False) if a == b)
542541
pct = agree / n if n > 0 else 0.0
543542

544543
cm = self._build_confusion_matrix(labels_a, labels_b)
@@ -558,7 +557,7 @@ def _build_confusion_matrix(
558557
) -> dict[tuple[str, str], int]:
559558
"""Build confusion matrix as {(label_a, label_b): count}."""
560559
cm: dict[tuple[str, str], int] = {}
561-
for a, b in zip(labels_a, labels_b):
560+
for a, b in zip(labels_a, labels_b, strict=False):
562561
key = (a, b)
563562
cm[key] = cm.get(key, 0) + 1
564563
return cm

src/datarecipe/mcp_server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -973,7 +973,7 @@ async def _recipe_diff(arguments: dict[str, Any]) -> list[TextContent]:
973973
if "all" in sections:
974974
sections = ["schema", "stats", "rubrics", "cost"]
975975

976-
lines = [f"## 分析对比", "", f"- A: `{os.path.basename(dir_a)}`", f"- B: `{os.path.basename(dir_b)}`", ""]
976+
lines = ["## 分析对比", "", f"- A: `{os.path.basename(dir_a)}`", f"- B: `{os.path.basename(dir_b)}`", ""]
977977

978978
def _load_json(base_dir: str, *paths: str) -> dict | None:
979979
for p in paths:

src/datarecipe/pii_detector.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import re
44
from dataclasses import dataclass, field
55

6-
76
# ==================== Dataclasses ====================
87

98

src/datarecipe/sources/local.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""Extract recipe information from local data files (CSV, Parquet, JSONL)."""
22

3-
import os
43
from pathlib import Path
54

65
from datarecipe.schema import (

tests/test_ira_analyzer.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import os
66
import tempfile
77
import unittest
8-
from pathlib import Path
98

109
from click.testing import CliRunner
1110

@@ -18,7 +17,6 @@
1817
PairwiseAgreement,
1918
)
2019

21-
2220
# ==================== Test data factories ====================
2321

2422

tests/test_local_source.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import tempfile
77
import unittest
88
from pathlib import Path
9-
from unittest.mock import MagicMock, patch
109

1110
from click.testing import CliRunner
1211

tests/test_pii_detector.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import os
66
import tempfile
77
import unittest
8-
from pathlib import Path
9-
from unittest.mock import patch
108

119
from click.testing import CliRunner
1210

@@ -19,7 +17,6 @@
1917
_luhn_check,
2018
)
2119

22-
2320
# ==================== Test data factories ====================
2421

2522

0 commit comments

Comments
 (0)