@@ -99,16 +99,22 @@ def test_pca_kernel_density(self):
9999 OUT_DIR + 'pca-coordinates-kde.tsv' , kde = True )
100100 coord = pd .read_table (OUT_DIR + 'pca-coordinates-kde.tsv' )
101101 expected = pd .read_table (EXPECT_DIR + 'expected-pca-coordinates-kde.tsv' )
102+ expected_negated = pd .read_table (EXPECT_DIR + 'expected-pca-coordinates-kde-negated.tsv' )
102103 coord_kde_peak = coord .loc [coord ['datapoint_labels' ] == 'kde_peak' ].round (5 )
103104 expected_kde_peak = expected .loc [expected ['datapoint_labels' ] == 'kde_peak' ].round (5 )
105+ expected_kde_peak_negated = expected_negated .loc [expected_negated ['datapoint_labels' ] == 'kde_peak' ].round (5 )
104106
105- assert coord_kde_peak .equals (expected_kde_peak )
107+ assert coord_kde_peak .equals (expected_kde_peak ) or coord_kde_peak . equals ( expected_kde_peak_negated )
106108
107109 def test_pca_robustness (self ):
108110 dataframe = ml .summarize_networks ([INPUT_DIR + 'test-data-s1/s1.txt' , INPUT_DIR + 'test-data-s2/s2.txt' ,
109111 INPUT_DIR + 'test-data-s3/s3.txt' ])
110- expected = pd .read_table (EXPECT_DIR + 'expected-pca-coordinates.tsv' )
112+ # PCA signage now depends on the input data: we need two differently signed PCA coordinate files.
113+ # See https://scikit-learn.org/stable/whats_new/v1.5.html#changed-models for more info.
114+ expected = pd .read_table (EXPECT_DIR + 'expected-pca-coordinates-sorted.tsv' )
115+ expected_other = pd .read_table (EXPECT_DIR + 'expected-pca-coordinates-sorted-negated.tsv' )
111116 expected = expected .round (5 )
117+ expected_other = expected_other .round (5 )
112118 expected .sort_values (by = 'datapoint_labels' , ignore_index = True , inplace = True )
113119
114120 for _ in range (5 ):
@@ -118,7 +124,7 @@ def test_pca_robustness(self):
118124 coord = pd .read_table (OUT_DIR + 'pca-shuffled-columns-coordinates.tsv' )
119125 coord = coord .round (5 ) # round values to 5 digits to account for numeric differences across machines
120126 coord .sort_values (by = 'datapoint_labels' , ignore_index = True , inplace = True )
121- assert coord .equals (expected )
127+ assert coord .equals (expected ) or coord . equals ( expected_other )
122128
123129 for _ in range (5 ):
124130 dataframe_shuffled = dataframe .sample (frac = 1 , axis = 0 ) # permute the rows
@@ -128,7 +134,7 @@ def test_pca_robustness(self):
128134 coord = coord .round (5 ) # round values to 5 digits to account for numeric differences across machines
129135 coord .sort_values (by = 'datapoint_labels' , ignore_index = True , inplace = True )
130136
131- assert coord .equals (expected )
137+ assert coord .equals (expected ) or coord . equals ( expected_other )
132138
133139 def test_hac_horizontal (self ):
134140 dataframe = ml .summarize_networks ([INPUT_DIR + 'test-data-s1/s1.txt' , INPUT_DIR + 'test-data-s2/s2.txt' , INPUT_DIR + 'test-data-s3/s3.txt' ])
0 commit comments