Skip to content

Commit ec89783

Browse files
authored
Merge pull request #229 from ipums/make-v4.2.2
Bump the version to 4.2.2
2 parents 8237724 + adaeef8 commit ec89783

File tree

11 files changed

+15
-28
lines changed

11 files changed

+15
-28
lines changed

examples/tutorial/tutorial.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,15 @@
99

1010

1111
def parse_args() -> argparse.Namespace:
12-
parser = argparse.ArgumentParser(
13-
description="""
12+
parser = argparse.ArgumentParser(description="""
1413
This script links two very small example datasets that live in the data
1514
subdirectory. It reads in the tutorial_config.toml configuration file
1615
and runs hlink's preprocessing and matching steps to find some potential
1716
matches between the two datasets.
1817
1918
For a detailed walkthrough of the tutorial, please see the README.md
2019
file in the same directory as this script.
21-
"""
22-
)
20+
""")
2321

2422
parser.add_argument(
2523
"--clean", action="store_true", help="drop existing Spark tables on startup"

hlink/linking/core/column_mapping.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ def transform_reverse(input_col: Column, transform: Mapping[str, Any], context:
7979
)
8080
from pyspark.sql.types import LongType
8181

82-
8382
ColumnMappingTransform: TypeAlias = Callable[
8483
[Column, Mapping[str, Any], Mapping[str, Any]], Column
8584
]

hlink/linking/hh_matching/link_step_block_on_households.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from hlink.linking.link_step import LinkStep
1111
from hlink.linking.util import set_job_description
1212

13-
1413
logger = logging.getLogger(__name__)
1514

1615

hlink/linking/model_exploration/link_step_train_test_models.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -843,11 +843,11 @@ def _aggregate_per_threshold_results(
843843
mcc = [r.mcc for r in prediction_results if not math.isnan(r.mcc)]
844844
f_measure = [r.f_measure for r in prediction_results if not math.isnan(r.f_measure)]
845845

846-
(precision_mean, precision_sd) = _compute_mean_and_stdev(precision)
847-
(recall_mean, recall_sd) = _compute_mean_and_stdev(recall)
848-
(pr_auc_mean, pr_auc_sd) = _compute_mean_and_stdev(pr_auc)
849-
(mcc_mean, mcc_sd) = _compute_mean_and_stdev(mcc)
850-
(f_measure_mean, f_measure_sd) = _compute_mean_and_stdev(f_measure)
846+
precision_mean, precision_sd = _compute_mean_and_stdev(precision)
847+
recall_mean, recall_sd = _compute_mean_and_stdev(recall)
848+
pr_auc_mean, pr_auc_sd = _compute_mean_and_stdev(pr_auc)
849+
mcc_mean, mcc_sd = _compute_mean_and_stdev(mcc)
850+
f_measure_mean, f_measure_sd = _compute_mean_and_stdev(f_measure)
851851

852852
new_desc = pd.DataFrame(
853853
{
@@ -962,17 +962,15 @@ def _handle_param_grid_attribute(training_settings: dict[str, Any]) -> dict[str,
962962
def _get_model_parameters(training_settings: dict[str, Any]) -> list[dict[str, Any]]:
963963
if "param_grid" in training_settings:
964964
print(
965-
dedent(
966-
"""\
965+
dedent("""\
967966
Deprecation Warning: training.param_grid is deprecated.
968967
969968
Please use training.model_parameter_search instead by replacing
970969
971970
`param_grid = True` with `model_parameter_search = {strategy = "grid"}` or
972971
`param_grid = False` with `model_parameter_search = {strategy = "explicit"}`
973972
974-
[deprecated_in_version=4.0.0]"""
975-
),
973+
[deprecated_in_version=4.0.0]"""),
976974
file=sys.stderr,
977975
)
978976

hlink/linking/training/link_step_create_comparison_features.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ def _create_training_features(self):
4242
dep_var = config[training_conf]["dependent_var"]
4343
if training_conf == "hh_training":
4444
hh_col = config[training_conf].get("hh_col", "serialp")
45-
tdl = self.task.spark.sql(
46-
f"""SELECT
45+
tdl = self.task.spark.sql(f"""SELECT
4746
td.{id_col}_a,
4847
td.{id_col}_b,
4948
td.{dep_var},
@@ -57,8 +56,7 @@ def _create_training_features(self):
5756
left join
5857
prepped_df_b pdfb
5958
on pdfb.{id_col} = td.{id_col}_b
60-
"""
61-
)
59+
""")
6260
else:
6361
tdl = self.task.spark.table(f"{table_prefix}training_data").select(
6462
f"{id_col}_a", f"{id_col}_b", dep_var

hlink/linking/util.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from contextlib import contextmanager
22
from math import ceil
33

4-
54
MIN_PARTITIONS = 200
65
MAX_PARTITIONS = 10000
76

hlink/tests/core/column_mapping_test.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from hlink.linking.core.column_mapping import apply_transform, select_column_mapping
77

8-
98
TEST_DF_1 = pd.DataFrame(
109
{
1110
"id": [0, 1, 2, 3, 4, 5],

hlink/tests/core/substitutions_test.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,11 @@ def test_load_substitutions(tmp_path: Path) -> None:
2424

2525
def test_generate_substitutions(spark: SparkSession, tmp_path: Path) -> None:
2626
tmp_file = tmp_path / "substitutions.csv"
27-
tmp_file.write_text(
28-
"""rose,rosie
27+
tmp_file.write_text("""rose,rosie
2928
sophia,sophy
3029
sophia,sofia
3130
amanda,mandy
32-
jane,jean"""
33-
)
31+
jane,jean""")
3432

3533
df = spark.createDataFrame(
3634
[("agnes", 2), ("mandy", 2), ("sophy", 2), ("rosie", 2), ("jean", 1)],

hlink/tests/core/transforms_test.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from hlink.linking.core.transforms import apply_transform, generate_transforms
66
from hlink.linking.link_task import LinkTask
77

8-
98
ignore_apply_transform_dep_warning = pytest.mark.filterwarnings(
109
r"ignore:\s*This is a deprecated alias for hlink.linking.core.column_mapping.apply_transform"
1110
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "hlink"
7-
version = "4.2.1"
7+
version = "4.2.2"
88
description = "Fast supervised pyspark record linkage software"
99
readme = "README.md"
1010
requires-python = ">=3.10"

0 commit comments

Comments
 (0)