Skip to content

Commit 1568b68

Browse files
committed
make release-tag: Merge branch 'main' into stable
2 parents 9d75e23 + c829829 commit 1568b68

File tree

34 files changed

+683
-40
lines changed

34 files changed

+683
-40
lines changed

.github/workflows/dependency_checker.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ jobs:
1616
run: |
1717
python -m pip install .[dev]
1818
make check-deps OUTPUT_FILEPATH=latest_requirements.txt
19+
make fix-lint
1920
- name: Create pull request
2021
id: cpr
2122
uses: peter-evans/create-pull-request@v4

HISTORY.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,21 @@
11
# History
22

3+
## v0.17.0 - 2024-11-14
4+
5+
This release adds a number of Multi-Sequence Aggregate Similarity (MSAS) metrics!
6+
7+
### Bugs Fixed
8+
9+
* Relocate timeseries metrics modules - Issue [#661](https://github.com/sdv-dev/SDMetrics/issues/661) by @fealho
10+
* Fix `SequenceLengthSimilarity` docstrings - Issue [#660](https://github.com/sdv-dev/SDMetrics/issues/660) by @fealho
11+
* When running Quality Report, ContingencySimilarity produces a RuntimeWarning (`The values in the array are unorderable.`) - Issue [#656](https://github.com/sdv-dev/SDMetrics/issues/656) by @R-Palazzo
12+
13+
### New Features
14+
15+
* Add metric for inter-row MSAS - Issue [#640](https://github.com/sdv-dev/SDMetrics/issues/640) by @fealho
16+
* Add metric for general MSAS statistics - Issue [#639](https://github.com/sdv-dev/SDMetrics/issues/639) by @fealho
17+
* Add metric for sequence length similarity - Issue [#638](https://github.com/sdv-dev/SDMetrics/issues/638) by @fealho
18+
319
## v0.16.0 - 2024-09-25
420

521
This release improves the performance of the `contingency_similarity` metric. It also factors dtypes into the score of the `TableStructure` metric.

conda/meta.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{% set version = '0.16.0' %}
1+
{% set version = '0.17.0.dev1' %}
22

33
package:
44
name: "{{ name|lower }}"

latest_requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ pandas==2.2.3
44
plotly==5.24.1
55
scikit-learn==1.5.2
66
scipy==1.13.1
7-
tqdm==4.66.5
7+
tqdm==4.67.0

pyproject.toml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ convention = 'google'
134134
add-ignore = ['D107', 'D407', 'D417']
135135

136136
[tool.bumpversion]
137-
current_version = "0.16.0"
137+
current_version = "0.17.0.dev1"
138138
parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
139139
serialize = [
140140
'{major}.{minor}.{patch}.{release}{candidate}',
@@ -186,7 +186,7 @@ exclude = [
186186
".git",
187187
"__pycache__",
188188
".ipynb_checkpoints",
189-
".ipynb",
189+
"*.ipynb",
190190
"tasks.py",
191191
]
192192

@@ -204,10 +204,11 @@ select = [
204204
# print statements
205205
"T201",
206206
# pandas-vet
207-
"PD"
207+
"PD",
208+
# numpy 2.0
209+
"NPY201"
208210
]
209211
ignore = [
210-
"E501",
211212
# pydocstyle
212213
"D107", # Missing docstring in __init__
213214
"D417", # Missing argument descriptions in the docstring, this is a bug from pydocstyle: https://github.com/PyCQA/pydocstyle/issues/449
@@ -229,7 +230,7 @@ lines-between-types = 0
229230
[tool.ruff.lint.per-file-ignores]
230231
"__init__.py" = ["F401", "E402", "F403", "F405", "E501", "I001"]
231232
"errors.py" = ["D105"]
232-
"tests/**.py" = ["D", "W505"]
233+
"tests/**.py" = ["D"]
233234

234235
[tool.ruff.lint.pydocstyle]
235236
convention = "google"

sdmetrics/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
__author__ = 'MIT Data To AI Lab'
66
__email__ = 'dailabmit@gmail.com'
7-
__version__ = '0.16.0'
7+
__version__ = '0.17.0.dev1'
88

99
import sys
1010
import warnings as python_warnings

sdmetrics/base.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,7 @@ def normalize(cls, raw_score):
9999

100100
if score is None or score < 0 or score > 1:
101101
raise AssertionError(
102-
f'This should be unreachable. The score {score} should be'
103-
f'a value between 0 and 1.'
102+
f'This should be unreachable. The score {score} should bea value between 0 and 1.'
104103
)
105104

106105
if cls.goal == Goal.MINIMIZE:

sdmetrics/column_pairs/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
DiscreteKLDivergence,
1212
)
1313
from sdmetrics.column_pairs.statistical.referential_integrity import ReferentialIntegrity
14+
from sdmetrics.column_pairs.statistical.inter_row_msas import InterRowMSAS
15+
from sdmetrics.column_pairs.statistical.statistic_msas import StatisticMSAS
1416

1517
__all__ = [
1618
'CardinalityBoundaryAdherence',
@@ -20,4 +22,6 @@
2022
'CorrelationSimilarity',
2123
'DiscreteKLDivergence',
2224
'ReferentialIntegrity',
25+
'InterRowMSAS',
26+
'StatisticMSAS',
2327
]

sdmetrics/column_pairs/statistical/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
DiscreteKLDivergence,
1111
)
1212
from sdmetrics.column_pairs.statistical.referential_integrity import ReferentialIntegrity
13+
from sdmetrics.column_pairs.statistical.inter_row_msas import InterRowMSAS
14+
from sdmetrics.column_pairs.statistical.statistic_msas import StatisticMSAS
1315

1416
__all__ = [
1517
'CardinalityBoundaryAdherence',
@@ -18,4 +20,6 @@
1820
'CorrelationSimilarity',
1921
'DiscreteKLDivergence',
2022
'ReferentialIntegrity',
23+
'InterRowMSAS',
24+
'StatisticMSAS',
2125
]

sdmetrics/column_pairs/statistical/contingency_similarity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def compute(cls, real_data, synthetic_data):
4444
contingency_synthetic = synthetic.groupby(list(columns), dropna=False).size() / len(
4545
synthetic
4646
)
47-
combined_index = contingency_real.index.union(contingency_synthetic.index)
47+
combined_index = contingency_real.index.union(contingency_synthetic.index, sort=False)
4848
contingency_synthetic = contingency_synthetic.reindex(combined_index, fill_value=0)
4949
contingency_real = contingency_real.reindex(combined_index, fill_value=0)
5050
diff = abs(contingency_real - contingency_synthetic).fillna(0)

0 commit comments

Comments
 (0)