sdv-dev
diff --git a/‎.github/workflows/dependency_checker.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/dependency_checker.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎HISTORY.md‎
Lines changed: 16 additions & 0 deletions b/‎HISTORY.md‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎conda/meta.yaml‎
Lines changed: 1 addition & 1 deletion b/‎conda/meta.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎latest_requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎latest_requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 6 additions & 5 deletions b/‎pyproject.toml‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎sdmetrics/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎sdmetrics/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdmetrics/base.py‎
Lines changed: 1 addition & 2 deletions b/‎sdmetrics/base.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎sdmetrics/column_pairs/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎sdmetrics/column_pairs/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎sdmetrics/column_pairs/statistical/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎sdmetrics/column_pairs/statistical/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎sdmetrics/column_pairs/statistical/contingency_similarity.py‎
Lines changed: 1 addition & 1 deletion b/‎sdmetrics/column_pairs/statistical/contingency_similarity.py‎
Lines changed: 1 addition & 1 deletion
@@ -16,6 +16,7 @@ jobs:
       run: |
         python -m pip install .[dev]
         make check-deps OUTPUT_FILEPATH=latest_requirements.txt
+        make fix-lint
     - name: Create pull request
       id: cpr
       uses: peter-evans/create-pull-request@v4
 
@@ -1,5 +1,21 @@
 # History
 
+## v0.17.0 - 2024-11-14
+
+This release adds a number of Multi-Sequence Aggregate Similarity (MSAS) metrics!
+
+### Bugs Fixed
+
+* Relocate timeseries metrics modules - Issue [#661](https://github.com/sdv-dev/SDMetrics/issues/661) by @fealho
+* Fix `SequenceLengthSimilarity` docstrings - Issue [#660](https://github.com/sdv-dev/SDMetrics/issues/660) by @fealho
+* When running Quality Report, ContingencySimilarity produces a RuntimeWarning (`The values in the array are unorderable.`) - Issue [#656](https://github.com/sdv-dev/SDMetrics/issues/656) by @R-Palazzo
+
+### New Features
+
+* Add metric for inter-row MSAS - Issue [#640](https://github.com/sdv-dev/SDMetrics/issues/640) by @fealho
+* Add metric for general MSAS statistics - Issue [#639](https://github.com/sdv-dev/SDMetrics/issues/639) by @fealho
+* Add metric for sequence length similarity - Issue [#638](https://github.com/sdv-dev/SDMetrics/issues/638) by @fealho
+
 ## v0.16.0 - 2024-09-25
 
 This release improves the performance of the `contingency_similarity` metric. It also factors dtypes into the score of the `TableStructure` metric.
 
@@ -1,4 +1,4 @@
-{% set version = '0.16.0' %}
+{% set version = '0.17.0.dev1' %}
 
 package:
   name: "{{ name|lower }}"
 
@@ -4,4 +4,4 @@ pandas==2.2.3
 plotly==5.24.1
 scikit-learn==1.5.2
 scipy==1.13.1
-tqdm==4.66.5
+tqdm==4.67.0
@@ -134,7 +134,7 @@ convention = 'google'
 add-ignore = ['D107', 'D407', 'D417']
 
 [tool.bumpversion]
-current_version = "0.16.0"
+current_version = "0.17.0.dev1"
 parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
 serialize = [
     '{major}.{minor}.{patch}.{release}{candidate}',
@@ -186,7 +186,7 @@ exclude = [
     ".git",
     "__pycache__",
     ".ipynb_checkpoints",
-    ".ipynb",
+    "*.ipynb",
     "tasks.py",
 ]
 
@@ -204,10 +204,11 @@ select = [
     # print statements
     "T201",
     # pandas-vet
-    "PD"
+    "PD",
+    # numpy 2.0
+    "NPY201"
 ]
 ignore = [
-    "E501",
     # pydocstyle
     "D107",  # Missing docstring in __init__
     "D417",   # Missing argument descriptions in the docstring, this is a bug from pydocstyle: https://github.com/PyCQA/pydocstyle/issues/449
@@ -229,7 +230,7 @@ lines-between-types = 0
 [tool.ruff.lint.per-file-ignores]
 "__init__.py" = ["F401", "E402", "F403", "F405", "E501", "I001"]
 "errors.py" = ["D105"]
-"tests/**.py" = ["D", "W505"]
+"tests/**.py" = ["D"]
 
 [tool.ruff.lint.pydocstyle]
 convention = "google"
 
@@ -4,7 +4,7 @@
 
 __author__ = 'MIT Data To AI Lab'
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.16.0'
+__version__ = '0.17.0.dev1'
 
 import sys
 import warnings as python_warnings
 
@@ -99,8 +99,7 @@ def normalize(cls, raw_score):
 
         if score is None or score < 0 or score > 1:
             raise AssertionError(
-                f'This should be unreachable. The score {score} should be'
-                f'a value between 0 and 1.'
+                f'This should be unreachable. The score {score} should bea value between 0 and 1.'
             )
 
         if cls.goal == Goal.MINIMIZE:
 
@@ -11,6 +11,8 @@
     DiscreteKLDivergence,
 )
 from sdmetrics.column_pairs.statistical.referential_integrity import ReferentialIntegrity
+from sdmetrics.column_pairs.statistical.inter_row_msas import InterRowMSAS
+from sdmetrics.column_pairs.statistical.statistic_msas import StatisticMSAS
 
 __all__ = [
     'CardinalityBoundaryAdherence',
@@ -20,4 +22,6 @@
     'CorrelationSimilarity',
     'DiscreteKLDivergence',
     'ReferentialIntegrity',
+    'InterRowMSAS',
+    'StatisticMSAS',
 ]
@@ -10,6 +10,8 @@
     DiscreteKLDivergence,
 )
 from sdmetrics.column_pairs.statistical.referential_integrity import ReferentialIntegrity
+from sdmetrics.column_pairs.statistical.inter_row_msas import InterRowMSAS
+from sdmetrics.column_pairs.statistical.statistic_msas import StatisticMSAS
 
 __all__ = [
     'CardinalityBoundaryAdherence',
@@ -18,4 +20,6 @@
     'CorrelationSimilarity',
     'DiscreteKLDivergence',
     'ReferentialIntegrity',
+    'InterRowMSAS',
+    'StatisticMSAS',
 ]
@@ -44,7 +44,7 @@ def compute(cls, real_data, synthetic_data):
         contingency_synthetic = synthetic.groupby(list(columns), dropna=False).size() / len(
             synthetic
         )
-        combined_index = contingency_real.index.union(contingency_synthetic.index)
+        combined_index = contingency_real.index.union(contingency_synthetic.index, sort=False)
         contingency_synthetic = contingency_synthetic.reindex(combined_index, fill_value=0)
         contingency_real = contingency_real.reindex(combined_index, fill_value=0)
         diff = abs(contingency_real - contingency_synthetic).fillna(0)
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-{% set version = '0.16.0' %}`
	`1`	`+{% set version = '0.17.0.dev1' %}`
`2`	`2`
`3`	`3`	`package:`
`4`	`4`	`name: "{{ name\|lower }}"`
Original file line number	Diff line number	Diff line change
`@@ -99,8 +99,7 @@ def normalize(cls, raw_score):`
`99`	`99`
`100`	`100`	`if score is None or score < 0 or score > 1:`
`101`	`101`	`raise AssertionError(`
`102`		`- f'This should be unreachable. The score {score} should be'`
`103`		`- f'a value between 0 and 1.'`
	`102`	`+ f'This should be unreachable. The score {score} should bea value between 0 and 1.'`
`104`	`103`	`)`
`105`	`104`
`106`	`105`	`if cls.goal == Goal.MINIMIZE:`
Original file line number	Diff line number	Diff line change
`@@ -11,6 +11,8 @@`
`11`	`11`	`DiscreteKLDivergence,`
`12`	`12`	`)`
`13`	`13`	`from sdmetrics.column_pairs.statistical.referential_integrity import ReferentialIntegrity`
	`14`	`+from sdmetrics.column_pairs.statistical.inter_row_msas import InterRowMSAS`
	`15`	`+from sdmetrics.column_pairs.statistical.statistic_msas import StatisticMSAS`
`14`	`16`
`15`	`17`	`__all__ = [`
`16`	`18`	`'CardinalityBoundaryAdherence',`
`@@ -20,4 +22,6 @@`
`20`	`22`	`'CorrelationSimilarity',`
`21`	`23`	`'DiscreteKLDivergence',`
`22`	`24`	`'ReferentialIntegrity',`
	`25`	`+ 'InterRowMSAS',`
	`26`	`+ 'StatisticMSAS',`
`23`	`27`	`]`
Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,8 @@`
`10`	`10`	`DiscreteKLDivergence,`
`11`	`11`	`)`
`12`	`12`	`from sdmetrics.column_pairs.statistical.referential_integrity import ReferentialIntegrity`
	`13`	`+from sdmetrics.column_pairs.statistical.inter_row_msas import InterRowMSAS`
	`14`	`+from sdmetrics.column_pairs.statistical.statistic_msas import StatisticMSAS`
`13`	`15`
`14`	`16`	`__all__ = [`
`15`	`17`	`'CardinalityBoundaryAdherence',`
`@@ -18,4 +20,6 @@`
`18`	`20`	`'CorrelationSimilarity',`
`19`	`21`	`'DiscreteKLDivergence',`
`20`	`22`	`'ReferentialIntegrity',`
	`23`	`+ 'InterRowMSAS',`
	`24`	`+ 'StatisticMSAS',`
`21`	`25`	`]`
Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ def compute(cls, real_data, synthetic_data):`
`44`	`44`	`contingency_synthetic = synthetic.groupby(list(columns), dropna=False).size() / len(`
`45`	`45`	`synthetic`
`46`	`46`	`)`
`47`		`- combined_index = contingency_real.index.union(contingency_synthetic.index)`
	`47`	`+ combined_index = contingency_real.index.union(contingency_synthetic.index, sort=False)`
`48`	`48`	`contingency_synthetic = contingency_synthetic.reindex(combined_index, fill_value=0)`
`49`	`49`	`contingency_real = contingency_real.reindex(combined_index, fill_value=0)`
`50`	`50`	`diff = abs(contingency_real - contingency_synthetic).fillna(0)`