11import numpy as np
2+ import pandas as pd
23import logging
34from collections .abc import Iterable
45from scipy .sparse import csr_matrix
@@ -13,7 +14,10 @@ def __init__(self, level):
1314 self .logger .propagate = False
1415
1516 def info (self , message ):
16- self .logger .info ("{}" .format (message ))
17+ self .logger .info (f"{ message } " )
18+
19+ def warning (self , message ):
20+ self .logger .warning (f"WARNING: { message } " )
1721
1822 def set_level (self , level ):
1923 levels = ["DEBUG" , "INFO" , "WARNING" , "ERROR" , "CRITICAL" ]
@@ -32,10 +36,11 @@ def _add_handler(self):
3236
3337def check_documents_type (documents ):
3438 """ Check whether the input documents are indeed a list of strings """
35- if isinstance (documents , Iterable ) and not isinstance (documents , str ):
39+ if isinstance (documents , pd .DataFrame ):
40+ raise TypeError ("Make sure to supply a list of strings, not a dataframe." )
41+ elif isinstance (documents , Iterable ) and not isinstance (documents , str ):
3642 if not any ([isinstance (doc , str ) for doc in documents ]):
3743 raise TypeError ("Make sure that the iterable only contains strings." )
38-
3944 else :
4045 raise TypeError ("Make sure that the documents variable is an iterable containing strings only." )
4146
@@ -94,15 +99,16 @@ def __getattr__(self, *args, **kwargs):
9499 def __call__ (self , * args , ** kwargs ):
95100 raise ModuleNotFoundError (self .msg )
96101
102+
97103def validate_distance_matrix (X , n_samples ):
98104 """ Validate the distance matrix and convert it to a condensed distance matrix
99105 if necessary.
100106
101- A valid distance matrix is either a square matrix of shape (n_samples, n_samples)
102- with zeros on the diagonal and non-negative values or condensed distance matrix
103- of shape (n_samples * (n_samples - 1) / 2,) containing the upper triangular of the
107+ A valid distance matrix is either a square matrix of shape (n_samples, n_samples)
108+ with zeros on the diagonal and non-negative values or condensed distance matrix
109+ of shape (n_samples * (n_samples - 1) / 2,) containing the upper triangular of the
104110 distance matrix.
105-
111+
106112 Arguments:
107113 X: Distance matrix to validate.
108114 n_samples: Number of samples in the dataset.
@@ -118,26 +124,26 @@ def validate_distance_matrix(X, n_samples):
118124 if len (s ) == 1 :
119125 # check it has correct size
120126 n = s [0 ]
121- if n != (n_samples * (n_samples - 1 ) / 2 ):
127+ if n != (n_samples * (n_samples - 1 ) / 2 ):
122128 raise ValueError ("The condensed distance matrix must have "
123- "shape (n*(n-1)/2,)." )
129+ "shape (n*(n-1)/2,)." )
124130 elif len (s ) == 2 :
125131 # check it has correct size
126132 if (s [0 ] != n_samples ) or (s [1 ] != n_samples ):
127133 raise ValueError ("The distance matrix must be of shape "
128- "(n, n) where n is the number of samples." )
134+ "(n, n) where n is the number of samples." )
129135 # force zero diagonal and convert to condensed
130136 np .fill_diagonal (X , 0 )
131137 X = squareform (X )
132138 else :
133139 raise ValueError ("The distance matrix must be either a 1-D condensed "
134- "distance matrix of shape (n*(n-1)/2,) or a "
135- "2-D square distance matrix of shape (n, n)."
136- "where n is the number of documents."
137- "Got a distance matrix of shape %s" % str (s ))
140+ "distance matrix of shape (n*(n-1)/2,) or a "
141+ "2-D square distance matrix of shape (n, n)."
142+ "where n is the number of documents."
143+ "Got a distance matrix of shape %s" % str (s ))
138144
139145 # Make sure its entries are non-negative
140146 if np .any (X < 0 ):
141147 raise ValueError ("Distance matrix cannot contain negative values." )
142148
143- return X
149+ return X
0 commit comments