0)
def test_image_video_kohonen(self):
- temp = get_temp_folder(__file__, "temp_graph_distance")
-
graph1 = [
("a", "b"),
("b", "c"),
@@ -70,31 +68,17 @@ def test_image_video_kohonen(self):
if distance is None:
raise AssertionError("expecting something different from None")
- outfile1 = os.path.join(temp, "unittest_GraphDistance4_sub1.png")
- outfile2 = os.path.join(temp, "unittest_GraphDistance4_sub2.png")
- outfilef = os.path.join(temp, "unittest_GraphDistance4_subf.png")
-
vertices, edges = graph1.draw_vertices_edges()
self.assertNotEmpty(vertices)
self.assertNotEmpty(edges)
- try:
- draw_graph_graphviz(vertices, edges, outfile1)
- except FileNotFoundError as e:
- if "No such file or directory: 'dot'" in str(e):
- return
- raise e
vertices, edges = graph2.draw_vertices_edges()
self.assertNotEmpty(vertices)
self.assertNotEmpty(edges)
- draw_graph_graphviz(vertices, edges, outfile2)
- self.assertTrue(os.path.exists(outfile2))
vertices, edges = graph.draw_vertices_edges()
self.assertNotEmpty(vertices)
self.assertNotEmpty(edges)
- draw_graph_graphviz(vertices, edges, outfilef)
- self.assertTrue(os.path.exists(outfilef))
def test_unittest_GraphDistance2(self):
graph1 = [
@@ -175,4 +159,4 @@ def test_unittest_common_paths(self):
if __name__ == "__main__":
- unittest.main()
+ unittest.main(verbosity=2)
diff --git a/_unittests/ut_graph/test_graphviz.py b/_unittests/ut_graph/test_graphviz.py
deleted file mode 100644
index 2c7f0599..00000000
--- a/_unittests/ut_graph/test_graphviz.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-@brief test log(time=2s)
-
-"""
-
-import os
-import unittest
-from pyquickhelper.pycode import get_temp_folder
-from mlstatpy.graph.graphviz_helper import draw_graph_graphviz
-
-
-class TestGraphviz(unittest.TestCase):
- def test_draw_graph_graphviz(self):
- temp = get_temp_folder(__file__, "temp_graphviz")
- fout = os.path.join(temp, "image.png")
-
- try:
- draw_graph_graphviz(
- [(1, "eee", "red")], [(1, 2, "blue"), (3, 4), (1, 3)], fout
- )
- except FileNotFoundError as e:
- if "No such file or directory: 'dot'" in str(e):
- return
- raise e
-
- self.assertTrue(os.path.exists(fout))
- self.assertTrue(os.path.exists(fout + ".gv"))
-
- def test_draw_graph_graphviz_no_image(self):
- try:
- res = draw_graph_graphviz(
- [(1, "eee", "red")], [(1, 2, "blue"), (3, 4), (1, 3)], image=None
- )
- except FileNotFoundError as e:
- if "No such file or directory: 'dot'" in str(e):
- return
- raise e
- self.assertIn('[label="eee"', res)
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/_unittests/ut_image/test_binom.py b/_unittests/ut_image/test_binom.py
index b854b6b0..6af21706 100644
--- a/_unittests/ut_image/test_binom.py
+++ b/_unittests/ut_image/test_binom.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=38s)
-"""
import unittest
from mlstatpy.image.detection_segment import tabule_queue_binom
diff --git a/_unittests/ut_image/test_geometrie.py b/_unittests/ut_image/test_geometrie.py
index 1d80111e..a885483c 100644
--- a/_unittests/ut_image/test_geometrie.py
+++ b/_unittests/ut_image/test_geometrie.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=38s)
-"""
import unittest
import math
from mlstatpy.image.detection_segment import Point, Segment
diff --git a/_unittests/ut_image/test_random_image.py b/_unittests/ut_image/test_random_image.py
index fe4442b0..0e9a5189 100644
--- a/_unittests/ut_image/test_random_image.py
+++ b/_unittests/ut_image/test_random_image.py
@@ -1,11 +1,7 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=2s)
-"""
import os
import unittest
import numpy
-from pyquickhelper.pycode import ExtTestCase, get_temp_folder
+from mlstatpy.ext_test_case import ExtTestCase, get_temp_folder
from mlstatpy.image.detection_segment.random_image import (
random_noise_image,
random_segment_image,
@@ -43,7 +39,7 @@ def test_random_segment_image(self):
img3 = convert_PIL2array(pil2)
self.assertEqual(timg255, img3)
- for _ in range(0, 100):
+ for _ in range(100):
seg = random_segment_image(img, lmin=0.5, density=2.0)
self.assertGreater(seg["x1"], 0)
self.assertGreater(seg["y1"], 0)
diff --git a/_unittests/ut_image/test_segments.py b/_unittests/ut_image/test_segments.py
index 084eccac..aa19c8f0 100644
--- a/_unittests/ut_image/test_segments.py
+++ b/_unittests/ut_image/test_segments.py
@@ -1,11 +1,7 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=10s)
-"""
import os
import unittest
import math
-from pyquickhelper.pycode import ExtTestCase, get_temp_folder
+from mlstatpy.ext_test_case import ExtTestCase, get_temp_folder
from mlstatpy.image.detection_segment.geometrie import Point
from mlstatpy.image.detection_segment.detection_segment_segangle import SegmentBord
from mlstatpy.image.detection_segment.detection_segment import (
@@ -28,7 +24,7 @@ def test_segment_bord(self):
res = []
while n:
res.append(s.copy())
- n = s.next() # pylint: disable=E1102
+ n = s.next()
self.assertEqual(len(res), 279)
self.assertEqual(res[-1].a, Point(0, 3))
self.assertEqual(res[-1].b, Point(7, 2))
@@ -55,7 +51,7 @@ def attendre_clic(screen):
reste = False
break
- import pygame # pylint: disable=C0415
+ import pygame
pygame.init()
screen = pygame.display.set_mode((xx * 4, yy * 4))
@@ -98,7 +94,7 @@ def attendre_clic(screen):
pygame.draw.line(screen, couleur[c % len(couleur)], a, b)
pygame.display.flip()
- n = s.next() # pylint: disable=E1102
+ n = s.next()
if angle != s.angle:
if TestSegments.visual and __name__ == "__main__":
print("changement angle = ", angle, " --> ", s.angle, " clic ", s)
@@ -127,7 +123,7 @@ def test_gradient_profile(self):
os.path.abspath(os.path.join(os.path.dirname(rootfile), ".."))
)
_, res = self.profile(
- lambda: _calcule_gradient(img, color=0), # pylint: disable=W0632
+ lambda: _calcule_gradient(img, color=0),
rootrem=rootrem,
)
short = "\n".join(res.split("\n")[:15])
@@ -158,7 +154,7 @@ def test_segment_detection_profile(self):
os.path.abspath(os.path.join(os.path.dirname(rootfile), ".."))
)
_, res = self.profile(
- lambda: detect_segments(img, stop=100), # pylint: disable=W0632
+ lambda: detect_segments(img, stop=100),
rootrem=rootrem,
)
short = "\n".join(res.split("\n")[:25])
diff --git a/_unittests/ut_ml/test_logreg.py b/_unittests/ut_ml/test_logreg.py
index eba5043b..f192c172 100644
--- a/_unittests/ut_ml/test_logreg.py
+++ b/_unittests/ut_ml/test_logreg.py
@@ -1,10 +1,5 @@
-"""
-@brief test log(time=2s)
-@author Xavier Dupre
-"""
-
import unittest
-from pyquickhelper.pycode import ExtTestCase
+from mlstatpy.ext_test_case import ExtTestCase
from mlstatpy.ml.logreg import criteria, criteria2, random_set_1d, plot_ds
diff --git a/_unittests/ut_ml/test_matrices.py b/_unittests/ut_ml/test_matrices.py
index c5428f98..32af670f 100644
--- a/_unittests/ut_ml/test_matrices.py
+++ b/_unittests/ut_ml/test_matrices.py
@@ -1,11 +1,7 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=2s)
-"""
import unittest
import numpy
import numpy.random as rnd
-from pyquickhelper.pycode import ExtTestCase
+from mlstatpy.ext_test_case import ExtTestCase
from mlstatpy.ml.matrices import (
gram_schmidt,
linear_regression,
@@ -30,7 +26,7 @@ def test_gram_schmidt(self):
res2, change2 = gram_schmidt(mat, change=True)
self.assertEqual(res, res2)
res3 = change2 @ mat
- self.assertEqual(res3, res2)
+ self.assertEqual(res3, res2, atol=1e-8)
mat1 = numpy.array([[1, 0, 0], [0, 0, 1]], dtype=float)
res = gram_schmidt(mat1)
@@ -62,7 +58,7 @@ def test_linear_regression(self):
y = numpy.array([1, 1.3, 3.9])
b1 = linear_regression(X, y)
b2 = linear_regression(X, y, algo="gram")
- self.assertEqualArray(b1, b2)
+ self.assertEqualArray(b1, b2, atol=1e-8)
def test_linear_regression_qr(self):
X = numpy.array([[1, 0.5, 0], [0, 0.4, 2]], dtype=float).T
@@ -70,8 +66,8 @@ def test_linear_regression_qr(self):
b1 = linear_regression(X, y)
b3 = linear_regression(X, y, algo="gram")
b2 = linear_regression(X, y, algo="qr")
- self.assertEqualArray(b1, b3)
- self.assertEqualArray(b1, b2)
+ self.assertEqualArray(b1, b3, atol=1e-8)
+ self.assertEqualArray(b1, b2, atol=1e-8)
def test_linear_regression_qr3(self):
X = numpy.array([[1, 0.5, 0], [0, 0.4, 2], [0, 0.4, 2.1]], dtype=float).T
@@ -79,21 +75,19 @@ def test_linear_regression_qr3(self):
b1 = linear_regression(X, y)
b3 = linear_regression(X, y, algo="gram")
b2 = linear_regression(X, y, algo="qr")
- self.assertEqualArray(b1, b3)
- self.assertEqualArray(b1, b2)
+ self.assertEqualArray(b1, b3, atol=1e-8)
+ self.assertEqualArray(b1, b2, atol=1e-8)
def test_dim_lin_reg(self):
X = rnd.randn(100, 7)
eps = rnd.randn(100, 1) / 3
- y = (
- X.sum(axis=1).reshape((X.shape[0], 1)) + eps # pylint: disable=E1101
- ) # pylint: disable=E1101
+ y = X.sum(axis=1).reshape((X.shape[0], 1)) + eps
y = y.ravel()
b1 = linear_regression(X, y)
b3 = linear_regression(X, y, algo="gram")
b2 = linear_regression(X, y, algo="qr")
- self.assertEqualArray(b1.ravel(), b3.ravel())
- self.assertEqualArray(b1.ravel(), b2.ravel())
+ self.assertEqualArray(b1.ravel(), b3.ravel(), atol=1e-8)
+ self.assertEqualArray(b1.ravel(), b2.ravel(), atol=1e-8)
def test_inner_code(self):
X = numpy.array(
@@ -103,9 +97,9 @@ def test_inner_code(self):
Xt = X.T
Tt = numpy.empty(Xt.shape)
Pt = numpy.identity(X.shape[1])
- for i in range(0, Xt.shape[0]):
+ for i in range(Xt.shape[0]):
Tt[i, :] = Xt[i, :]
- for j in range(0, i):
+ for j in range(i):
d = numpy.dot(Tt[j, :], Xt[i, :])
Tt[i, :] -= Tt[j, :] * d
Pt[i, :] -= Pt[j, :] * d
@@ -118,12 +112,12 @@ def test_inner_code(self):
self.assertEqual(Tt.shape, Xt.shape)
self.assertEqual(Pt.shape, (X.shape[1], X.shape[1]))
_Tt = Pt @ Xt
- self.assertEqualArray(_Tt, Tt)
+ self.assertEqualArray(_Tt, Tt, atol=1e-8)
self.assertEqualArray(Tt @ Tt.T, numpy.identity(Tt.shape[0]), atol=1e-10)
beta1 = numpy.linalg.inv(Xt @ X) @ Xt @ y
beta2 = Tt @ y @ Pt
- self.assertEqualArray(beta1, beta2)
+ self.assertEqualArray(beta1, beta2, atol=1e-8)
def test_streaming_gram_schmidt(self):
X0 = numpy.array(
@@ -150,7 +144,7 @@ def test_streaming_gram_schmidt(self):
self.assertEqualArray(t_.T @ t_, idd, atol=1e-10)
algo2 = []
self.assertRaise(
- lambda: list(streaming_gram_schmidt(X)), # pylint: disable=W0640
+ lambda X=X: list(streaming_gram_schmidt(X)),
RuntimeError,
)
for i, p in enumerate(streaming_gram_schmidt(Xt)):
@@ -183,15 +177,13 @@ def test_streaming_linear_regression(self):
algo1.append(bk)
algo2 = []
self.assertRaise(
- lambda: list(
- streaming_linear_regression(X.T, y)
- ), # pylint: disable=W0640
+ lambda X=X, y=y: list(streaming_linear_regression(X.T, y)),
RuntimeError,
)
for i, bk in enumerate(streaming_linear_regression(X, y)):
algo2.append(bk.copy())
self.assertNotEmpty(bk)
- self.assertEqualArray(algo1[i], algo2[i])
+ self.assertEqualArray(algo1[i], algo2[i], atol=1e-8)
self.assertEqual(len(algo1), len(algo2))
def test_streaming_linear_regression_graph_schmidt(self):
@@ -215,32 +207,24 @@ def test_streaming_linear_regression_graph_schmidt(self):
algo1.append(bk)
algo2 = []
self.assertRaise(
- lambda: list(
- streaming_linear_regression_gram_schmidt(X.T, y)
- ), # pylint: disable=W0640
+ lambda X=X, y=y: list(streaming_linear_regression_gram_schmidt(X.T, y)),
RuntimeError,
)
for i, bk in enumerate(streaming_linear_regression_gram_schmidt(X, y)):
algo2.append(bk.copy())
self.assertNotEmpty(bk)
- self.assertEqualArray(algo1[i], algo2[i])
+ self.assertEqualArray(algo1[i], algo2[i], atol=1e-8)
self.assertEqual(len(algo1), len(algo2))
def test_profile(self):
N = 1000
X = rnd.randn(N, 10)
eps = rnd.randn(N, 1) / 3
- y = (
- X.sum(axis=1).reshape((X.shape[0], 1)) + eps # pylint: disable=E1101
- ) # pylint: disable=E1101
+ y = X.sum(axis=1).reshape((X.shape[0], 1)) + eps
y = y.ravel()
res = self.profile(lambda: list(streaming_linear_regression_gram_schmidt(X, y)))
- if __name__ == "__main__":
- print("***", res[1])
self.assertIn("streaming", res[1])
res = self.profile(lambda: list(streaming_linear_regression(X, y)))
- if __name__ == "__main__":
- print("***", res[1])
self.assertIn("streaming", res[1])
def test_norm2(self):
@@ -250,4 +234,4 @@ def test_norm2(self):
if __name__ == "__main__":
- unittest.main()
+ unittest.main(verbosity=2)
diff --git a/_unittests/ut_ml/test_neural_tree.py b/_unittests/ut_ml/test_neural_tree.py
index 0199176e..1cc7d330 100644
--- a/_unittests/ut_ml/test_neural_tree.py
+++ b/_unittests/ut_ml/test_neural_tree.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=23s)
-"""
import io
import unittest
import pickle
@@ -9,7 +5,7 @@
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, export_graphviz
from sklearn.datasets import load_iris
from sklearn.tree import export_text
-from pyquickhelper.pycode import ExtTestCase
+from mlstatpy.ext_test_case import ExtTestCase, ignore_warnings
from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
from mlstatpy.ml.neural_tree import (
NeuralTreeNode,
@@ -27,11 +23,7 @@ def test_neural_tree_node(self):
res = neu.predict(numpy.array([4, 5]))
self.assertEqual(res, 5.5)
st = repr(neu)
- self.assertEqual(
- "NeuralTreeNode(weights=array([0., 1.]), "
- "bias=0.5, activation='identity')",
- st,
- )
+ self.assertIn("NeuralTreeNode(weights=array([0., 1.]), ", st)
st = io.BytesIO()
pickle.dump(neu, st)
st = io.BytesIO(st.getvalue())
@@ -43,7 +35,7 @@ def test_neural_tree_network(self):
X = numpy.random.randn(2, 3)
got = net.predict(X)
exp = X.sum(axis=1)
- self.assertEqual(exp.reshape((-1, 1)), got[:, -1:])
+ self.assertEqualArray(exp.reshape((-1, 1)), got[:, -1:])
rep = repr(net)
self.assertEqual(rep, "NeuralTreeNet(3)")
net.clear()
@@ -143,7 +135,7 @@ def test_neural_tree_network_training_weights(self):
w = net.training_weights
self.assertEqual(w.shape, (6,))
self.assertEqual(w[0], 0)
- self.assertEqualArray(w[1:4], [1, 1, 1])
+ self.assertEqualArray(w[1:4], numpy.array([1, 1, 1], dtype=float))
delta = numpy.arange(6) - 0.5
net.update_training_weights(delta)
w2 = net.training_weights
@@ -192,7 +184,7 @@ def test_gradients(self):
with self.subTest(act=act):
neu = NeuralTreeNode(w, bias=b, activation=act)
pred = neu.predict(X)
- self.assertAlmostEqual(numpy.sum(pred), 1.0)
+ self.assertAlmostEqual(numpy.sum(pred), 1.0, atol=1e-10)
self.assertEqual(pred.shape, (2,))
grad = neu.gradient_backward(g, X)
self.assertEqual(grad.shape, (2, 4))
@@ -204,7 +196,7 @@ def test_gradients(self):
self.assertEqualArray(w0, numpy.zeros(w0.shape))
def test_optim_regression(self):
- state = numpy.random.RandomState(seed=0) # pylint: disable=E1101
+ state = numpy.random.RandomState(seed=0)
X = numpy.abs(state.randn(10, 2))
w0 = state.randn(3)
w1 = numpy.array([-0.5, 0.8, -0.6])
@@ -283,8 +275,8 @@ def test_label_class_to_softmax_output(self):
)
soft_y = label_class_to_softmax_output(y_label)
self.assertEqual(soft_y.shape, (4, 2))
- self.assertEqual(soft_y[:, 1], y_label)
- self.assertEqual(soft_y[:, 0], 1 - y_label)
+ self.assertEqualArray(soft_y[:, 1], y_label.astype(float))
+ self.assertEqualArray(soft_y[:, 0], 1 - y_label.astype(float))
def test_neural_net_gradient(self):
X = numpy.arange(8).astype(numpy.float64).reshape((-1, 2))
@@ -321,6 +313,7 @@ def test_neural_net_gradient_regression(self):
self.assertEqualArray(loss1, loss2, atol=1e-5)
self.assertEqualArray(grad1, grad2, atol=1e-5)
+ @ignore_warnings(DeprecationWarning)
def test_neural_net_gradient_regression_2(self):
X = numpy.abs(numpy.random.randn(10, 2))
w1 = numpy.array([-0.5, 0.8, -0.6])
@@ -352,16 +345,17 @@ def test_neural_net_gradient_regression_2(self):
pred2 = net.predict(X)
loss2 = net.loss(X, y)
- self.assertEqualArray(pred1, pred2[:, -1])
+ self.assertEqualArray(pred1, pred2[:, -1], atol=1e-10)
self.assertEqualArray(pred2[:, -2], pred2[:, -1])
self.assertEqualArray(pred2[:, 2], pred2[:, 3])
self.assertEqualArray(loss1, loss2, atol=1e-7)
- for p in range(0, 5):
+ for p in range(5):
grad1 = neu.gradient(X[p], y[p])
grad2 = net.gradient(X[p], y[p])
self.assertEqualArray(grad1, grad2[:3], atol=1e-7)
+ @ignore_warnings(DeprecationWarning)
def test_neural_net_gradient_regression_2_h2(self):
X = numpy.abs(numpy.random.randn(10, 2))
w1 = numpy.array([-0.5, 0.8, -0.6])
@@ -414,11 +408,11 @@ def test_neural_net_gradient_regression_2_h2(self):
pred2 = net.predict(X)
loss2 = net.loss(X, y)
- self.assertEqualArray(pred1, pred2[:, -1])
- self.assertEqualArray(pred2[:, 2], pred2[:, -1])
+ self.assertEqualArray(pred1, pred2[:, -1], atol=1e-8)
+ self.assertEqualArray(pred2[:, 2], pred2[:, -1], atol=1e-10)
self.assertEqualArray(loss1, loss2, atol=1e-7)
- for p in range(0, 5):
+ for p in range(5):
grad1 = neu.gradient(X[p], y[p])
grad2 = net.gradient(X[p], y[p])
self.assertEqualArray(grad1, grad2[:3], atol=1e-7)
@@ -488,6 +482,7 @@ def test_shape_dim2(self):
loss = neu.loss(X, numpy.zeros((X.shape[0], 1), dtype=numpy.float64))
self.assertEqual(loss.shape, (10, 2))
+ @ignore_warnings(DeprecationWarning)
def test_convert_compact(self):
X = numpy.arange(8).astype(numpy.float64).reshape((-1, 2))
y = ((X[:, 0] + X[:, 1] * 2) > 10).astype(numpy.int64)
@@ -589,8 +584,9 @@ def test_convert_compact_skl_onnx(self):
self.assertIn("Softmax(", text)
oinf = ReferenceEvaluator(onx)
got2 = oinf.run(None, {"X": x32})[0]
- self.assertEqualArray(exp[:, 1], got2, atol=1e-5)
+ self.assertEqualArray(exp[:, 1], got2.astype(float).ravel(), atol=1e-5)
+ @ignore_warnings(DeprecationWarning)
def test_convert_reg_compact(self):
X = numpy.arange(32).astype(numpy.float64).reshape((-1, 2))
y = (X[:, 0] + X[:, 1] * 2).astype(numpy.float64)
@@ -616,10 +612,11 @@ def test_convert_reg_compact(self):
self.assertNotEmpty(root)
exp = tree.predict(X)
got = root.predict(X)
- self.assertEqualArray(exp, got[:, -1], decimal=6)
+ self.assertEqualArray(exp, got[:, -1], atol=1e-6)
dot = root.to_dot()
self.assertIn("9 -> 17", dot)
+ @ignore_warnings(DeprecationWarning)
def test_convert_compact_skl_reg(self):
X = numpy.arange(8).astype(numpy.float64).reshape((-1, 2))
y = X[:, 0] + X[:, 1] * 2
@@ -630,12 +627,13 @@ def test_convert_compact_skl_reg(self):
exp = tree.predict(X)
got = root.predict(X)
self.assertEqual(exp.shape[0], got.shape[0])
- self.assertEqualArray(exp, got[:, -1])
+ self.assertEqualArray(exp, got[:, -1], atol=1e-7)
skl = NeuralTreeNetRegressor(root)
prob = skl.predict(X)
- self.assertEqualArray(exp, prob.ravel())
+ self.assertEqualArray(exp, prob.ravel(), atol=1e-7)
+ @ignore_warnings(DeprecationWarning)
def test_convert_compact_skl_fit_reg(self):
X = numpy.arange(8).astype(numpy.float64).reshape((-1, 2))
y = X[:, 0] + X[:, 1] * 2
@@ -646,8 +644,9 @@ def test_convert_compact_skl_fit_reg(self):
skl.fit(X, y)
exp = tree.predict(X)
got = skl.predict(X)
- self.assertEqualArray(exp, got.ravel())
+ self.assertEqualArray(exp, got.ravel(), atol=1e-7)
+ @ignore_warnings(DeprecationWarning)
def test_convert_compact_skl_onnx_reg(self):
from skl2onnx import to_onnx
from onnx.reference import ReferenceEvaluator
@@ -660,9 +659,9 @@ def test_convert_compact_skl_onnx_reg(self):
skl = NeuralTreeNetRegressor(root)
got = skl.predict(X)
exp = tree.predict(X)
- self.assertEqualArray(exp, got.ravel())
+ self.assertEqualArray(exp, got.ravel(), atol=1e-7)
dec = root.predict(X)
- self.assertEqualArray(exp, dec[:, -1])
+ self.assertEqualArray(exp, dec[:, -1], atol=1e-7)
x32 = X.astype(numpy.float32)
onx = to_onnx(skl, x32, target_opset=15)
@@ -671,9 +670,8 @@ def test_convert_compact_skl_onnx_reg(self):
self.assertNotIn("Softmax(", text)
oinf = ReferenceEvaluator(onx)
got2 = oinf.run(None, {"X": x32})[0]
- self.assertEqualArray(exp, got2.ravel())
+ self.assertEqualArray(exp, got2.ravel().astype(float))
if __name__ == "__main__":
- # TestNeuralTree().test_convert_reg_compact()
- unittest.main()
+ unittest.main(verbosity=2)
diff --git a/_unittests/ut_ml/test_nuage_points.py b/_unittests/ut_ml/test_nuage_points.py
index 9349ed1c..1b11dc1a 100644
--- a/_unittests/ut_ml/test_nuage_points.py
+++ b/_unittests/ut_ml/test_nuage_points.py
@@ -1,11 +1,7 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=1s)
-"""
import unittest
import numpy
from numpy.testing import assert_array_equal
-from pyquickhelper.pycode import ExtTestCase, ignore_warnings
+from mlstatpy.ext_test_case import ExtTestCase, ignore_warnings
from sklearn.neighbors import NearestNeighbors
from mlstatpy.ml.kppv import NuagePoints
from mlstatpy.ml.kppv_laesa import NuagePointsLaesa
diff --git a/_unittests/ut_ml/test_roc.py b/_unittests/ut_ml/test_roc.py
index dab10a8e..d6c82bea 100644
--- a/_unittests/ut_ml/test_roc.py
+++ b/_unittests/ut_ml/test_roc.py
@@ -1,29 +1,17 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=70s)
-"""
import os
import unittest
import random
-from pyquickhelper.loghelper import fLOG
-from pyquickhelper.pycode import (
- get_temp_folder,
- fix_tkinter_issues_virtualenv,
- ExtTestCase,
-)
+from mlstatpy.ext_test_case import get_temp_folder, ExtTestCase
from mlstatpy.ml.roc import ROC
class TestROC(ExtTestCase):
def test_roc(self):
- fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
-
- fix_tkinter_issues_virtualenv()
- import matplotlib.pyplot as plt # pylint: disable=C0415
+ import matplotlib.pyplot as plt
temp = get_temp_folder(__file__, "temp_roc")
- data = [random.random() for a in range(0, 1000)]
+ data = [random.random() for a in range(1000)]
data = [(x, 1 if x + random.random() / 3 > 0.7 else 0) for x in data]
test = ROC(y_true=[_[1] for _ in data], y_score=[_[0] for _ in data])
@@ -31,20 +19,18 @@ def test_roc(self):
self.assertNotEmpty(repr(test))
self.assertEqual(len(test), len(data))
test = ROC(df=data)
- fLOG(test.__str__()) # pylint: disable=C2801
+
roc = test.compute_roc_curve()
t = test.roc_intersect(roc, 0.2)
self.assertTrue(1 >= t >= 0)
conf = test.confusion()
- s = str(conf)
- fLOG(s)
+ str(conf)
+
self.assertEqual(conf.shape, (12, 5))
conf = test.confusion(score=0.5)
- fLOG(conf)
- self.assertEqual(conf.shape, (1, 5))
- fLOG("graph.............. PROBSCORE")
+ self.assertEqual(conf.shape, (1, 5))
fig, ax = plt.subplots()
ax = test.plot(0, ax=ax, curve=ROC.CurveType.PROBSCORE, thresholds=True)
@@ -58,8 +44,6 @@ def test_roc(self):
self.assertNotEmpty(ax)
fig.savefig(os.path.join(temp, "roc_PROBSCORE_100_b10.png"))
- fLOG("graph.............. SKROC")
-
fig, ax = plt.subplots()
ax = test.plot(0, ax=ax, curve=ROC.CurveType.SKROC)
self.assertNotEmpty(ax)
@@ -70,8 +54,6 @@ def test_roc(self):
self.assertNotEmpty(ax)
fig.savefig(os.path.join(temp, "roc_SKROC_100_b10.png"))
- fLOG("graph.............. RECPREC")
-
fig, ax = plt.subplots()
ax = test.plot(100, ax=ax, curve=ROC.CurveType.RECPREC)
self.assertNotEmpty(ax)
@@ -82,8 +64,6 @@ def test_roc(self):
self.assertNotEmpty(ax)
fig.savefig(os.path.join(temp, "roc_RECPREC_100_b10.png"))
- fLOG("graph.............. SKROC True")
-
fig, ax = plt.subplots()
ax = test.plot(0, ax=ax, curve=ROC.CurveType.SKROC, thresholds=True)
self.assertNotEmpty(ax)
@@ -94,8 +74,6 @@ def test_roc(self):
self.assertNotEmpty(ax)
fig.savefig(os.path.join(temp, "roc_SKROC_T_100_b10.png"))
- fLOG("graph.............. RECPREC True")
-
fig, ax = plt.subplots()
ax = test.plot(100, ax=ax, curve=ROC.CurveType.RECPREC, thresholds=True)
self.assertNotEmpty(ax)
@@ -108,8 +86,6 @@ def test_roc(self):
self.assertNotEmpty(ax)
fig.savefig(os.path.join(temp, "roc_RECPREC_T_100_b10.png"))
- fLOG("graph.............. ERRREC")
-
fig, ax = plt.subplots()
ax = test.plot(100, ax=ax, curve=ROC.CurveType.ERRREC)
self.assertNotEmpty(ax)
@@ -120,8 +96,6 @@ def test_roc(self):
self.assertNotEmpty(ax)
fig.savefig(os.path.join(temp, "roc_ERRREC_100_b10.png"))
- fLOG("graph.............. ROC")
-
fig, ax = plt.subplots()
self.assertRaise(
lambda: test.plot(10, ax=ax, label=["r10", "p10"], curve=ROC.CurveType.ROC),
@@ -145,27 +119,20 @@ def test_roc(self):
self.assertNotEmpty(ax)
fig.savefig(os.path.join(temp, "roc_ROC_100_b10.png"))
- fLOG("computing rate..............................")
values = test.auc_interval(alpha=0.1, bootstrap=20)
- for k, v in sorted(values.items()):
- fLOG(f"{k}={v}")
self.assertEqual(
list(sorted(values.keys())),
["auc", "interval", "max", "mean", "mediane", "min", "var"],
)
self.assertTrue(values["min"] <= values["auc"] <= values["max"])
- fLOG("computing rate..............................")
values = test.roc_intersect_interval(0.1, 100, bootstrap=50)
- for k, v in sorted(values.items()):
- fLOG(f"{k}={v}")
self.assertEqual(
list(sorted(values.keys())),
["interval", "max", "mean", "mediane", "min", "var", "y"],
)
self.assertTrue(values["min"] <= values["y"] <= values["max"])
plt.close("all")
- fLOG("end")
if __name__ == "__main__":
diff --git a/_unittests/ut_ml/test_voronoi.py b/_unittests/ut_ml/test_voronoi.py
index 991cd03c..162c523d 100644
--- a/_unittests/ut_ml/test_voronoi.py
+++ b/_unittests/ut_ml/test_voronoi.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=6s)
-"""
import math
import unittest
from io import StringIO
@@ -9,13 +5,10 @@
import numpy
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
-from pyquickhelper.pycode import ExtTestCase, add_missing_development_version
+from mlstatpy.ext_test_case import ExtTestCase
class TestVoronoi(ExtTestCase):
- def setUp(self):
- add_missing_development_version(["mlinsights"], __file__, hide=True)
-
def test_iris(self):
from mlstatpy.ml import voronoi_estimation_from_lr
@@ -43,7 +36,7 @@ def test_iris(self):
expected_values = numpy.array(
[[3.0, 4.137], [5.044, 0.281], [5.497, 0.184]]
)
- self.assertEqualArray(expected_values, points, decimal=2)
+ self.assertEqualArray(expected_values, points, atol=1e-2)
points = voronoi_estimation_from_lr(
clr.coef_, clr.intercept_, C, D, qr=True, verbose=True
@@ -52,7 +45,7 @@ def test_iris(self):
expected_values = numpy.array(
[[3.0, 4.137], [5.044, 0.281], [5.497, 0.184]]
)
- self.assertEqualArray(expected_values, points, decimal=2)
+ self.assertEqualArray(expected_values, points, atol=1e-2)
std = std.getvalue()
self.assertIn("[voronoi_estimation_from_lr] iter=", std)
@@ -79,7 +72,7 @@ def test_iris_dim4(self):
self.assertEqual(points.shape, (3, 4))
points2 = voronoi_estimation_from_lr(clr.coef_, clr.intercept_, C, D, qr=True)
self.assertEqual(points2.shape, (3, 4))
- self.assertEqualArray(points2, points2, decimal=5)
+ self.assertEqualArray(points2, points2, atol=1e-5)
def test_square(self):
from mlstatpy.ml.voronoi import voronoi_estimation_from_lr
@@ -87,8 +80,8 @@ def test_square(self):
Xs = []
Ys = []
n = 20
- for i in range(0, 4):
- for j in range(0, 3):
+ for i in range(4):
+ for j in range(3):
x1 = numpy.random.rand(n) + i * 1.1
x2 = numpy.random.rand(n) + j * 1.1
Xs.append(numpy.vstack([x1, x2]).T)
@@ -116,7 +109,7 @@ def test_hexa_scale(self):
for i in range(n):
for j in range(n):
dil = ((i + 1) ** 2 + (j + 1) ** 2) ** 0.6
- for _ in range(0, 20):
+ for _ in range(20):
x = i + j * math.cos(a)
y = j * math.sin(a)
points.append([x * dil, y * dil])
@@ -124,7 +117,7 @@ def test_hexa_scale(self):
mi = 0.5
for r in [0.1, 0.3, mi]:
nb = 6 if r == mi else 12
- for k2 in range(0, nb):
+ for k2 in range(nb):
ang = math.pi * 2 / nb * k2 + math.pi / 6
x = i + j * math.cos(a) + r * math.cos(ang)
y = j * math.sin(a) + r * math.sin(ang)
diff --git a/_unittests/ut_nlp/test_completion.py b/_unittests/ut_nlp/test_completion.py
index cb289a8f..742677ce 100644
--- a/_unittests/ut_nlp/test_completion.py
+++ b/_unittests/ut_nlp/test_completion.py
@@ -1,12 +1,7 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=3s)
-"""
-# pylint: disable=W0719
import os
import unittest
import itertools
-from pyquickhelper.pycode import ExtTestCase
+from mlstatpy.ext_test_case import ExtTestCase
from mlstatpy.nlp.completion import CompletionTrieNode
from mlstatpy.data.wikipedia import normalize_wiki_text, enumerate_titles
from mlstatpy.nlp.normalize import remove_diacritics
@@ -195,12 +190,12 @@ def cmks(trie):
raise AssertionError(f"gmks={gmks} gmksd={gmksd}")
if gmksd == 0:
i = 0
- for node in trie:
+ for _node in trie:
# print(node.value, "--", node.stat.str_mks())
if i > 20:
break
i += 1
- assert False
+ raise AssertionError("should not happen")
trie = CompletionTrieNode.build(titles)
nb2, gmks2, gmksd2, size = cmks(trie)
diff --git a/_unittests/ut_nlp/test_completion_longer.py b/_unittests/ut_nlp/test_completion_longer.py
index 4ea0cee9..3e7e77b7 100644
--- a/_unittests/ut_nlp/test_completion_longer.py
+++ b/_unittests/ut_nlp/test_completion_longer.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=16s)
-"""
import os
import unittest
from mlstatpy.nlp.completion import CompletionTrieNode
diff --git a/_unittests/ut_nlp/test_completion_mks.py b/_unittests/ut_nlp/test_completion_mks.py
index 2d74aa37..9eb42f31 100644
--- a/_unittests/ut_nlp/test_completion_mks.py
+++ b/_unittests/ut_nlp/test_completion_mks.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=3s)
-"""
import os
import unittest
from mlstatpy.nlp.completion import CompletionTrieNode
diff --git a/_unittests/ut_nlp/test_completion_profiling.py b/_unittests/ut_nlp/test_completion_profiling.py
index 2718878a..ade968f9 100644
--- a/_unittests/ut_nlp/test_completion_profiling.py
+++ b/_unittests/ut_nlp/test_completion_profiling.py
@@ -1,16 +1,14 @@
-# -*- coding: utf-8 -*-
"""
-@brief test log(time=2s)
-
https://dumps.wikimedia.org/frwiki/latest/frwiki-latest-all-titles.gz
https://dumps.wikimedia.org/frwiki/latest/frwiki-latest-all-titles-in-ns0.gz
"""
+
import os
import unittest
import cProfile
import pstats
import io
-from pyquickhelper.pycode import get_temp_folder
+from mlstatpy.ext_test_case import get_temp_folder
from mlstatpy.nlp.completion import CompletionTrieNode
@@ -52,9 +50,7 @@ def prof(n, show):
res = s.getvalue().replace(rem, "")
if show:
print(res)
- with open(
- os.path.join(temp, "profiling%d.txt" % n), "w"
- ) as f: # pylint: disable=W1514
+ with open(os.path.join(temp, "profiling%d.txt" % n), "w") as f:
f.write(res)
prof(1, show=False)
diff --git a/_unittests/ut_nlp/test_completion_simple.py b/_unittests/ut_nlp/test_completion_simple.py
index f6917913..6cd069cf 100644
--- a/_unittests/ut_nlp/test_completion_simple.py
+++ b/_unittests/ut_nlp/test_completion_simple.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=3s)
-"""
import os
import unittest
import itertools
diff --git a/_unittests/ut_nlp/test_completion_simple_optim.py b/_unittests/ut_nlp/test_completion_simple_optim.py
index c7f62764..f1e38a48 100644
--- a/_unittests/ut_nlp/test_completion_simple_optim.py
+++ b/_unittests/ut_nlp/test_completion_simple_optim.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=3s)
-"""
import unittest
from mlstatpy.nlp.completion_simple import CompletionSystem
@@ -21,7 +17,7 @@ def test_build_trie_simple(self):
cset = CompletionSystem(comp)
cset.compute_metrics()
queries = [(q, 1) for q in comp]
- for el, found in cset.enumerate_test_metric(queries):
+ for _el, found in cset.enumerate_test_metric(queries):
# print(el, found)
assert found is not None
res = cset.test_metric(queries)
diff --git a/_unittests/ut_optim/test_optim.py b/_unittests/ut_optim/test_optim.py
index 2fe5f98b..b00f7375 100644
--- a/_unittests/ut_optim/test_optim.py
+++ b/_unittests/ut_optim/test_optim.py
@@ -6,7 +6,7 @@
from contextlib import redirect_stdout
import unittest
import numpy
-from pyquickhelper.pycode import ExtTestCase
+from mlstatpy.ext_test_case import ExtTestCase
from mlstatpy.optim import SGDOptimizer
@@ -99,7 +99,7 @@ def test_sgd_optimizer_l1l2(self):
def test_sgd_optimizer_raise(self):
coef = numpy.array([0.5, 0.6, 0.7])
- rs = numpy.random.RandomState(seed=0) # pylint: disable=E1101
+ rs = numpy.random.RandomState(seed=0)
X = rs.randn(10, 3)
y = X @ coef
diff --git a/_unittests/ut_run_long/test_LONG_completion.py b/_unittests/ut_run_long/test_LONG_completion.py
deleted file mode 100644
index 408d3e6e..00000000
--- a/_unittests/ut_run_long/test_LONG_completion.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=33s)
-"""
-import os
-import unittest
-from pyquickhelper.loghelper import fLOG, CustomLog
-from pyquickhelper.pycode import get_temp_folder
-from mlstatpy.nlp.completion import CompletionTrieNode
-
-
-class TestLONGCompletion(unittest.TestCase):
- def test_build_dynamic_trie_mks_min(self):
- fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
-
- data = os.path.join(
- os.path.abspath(os.path.dirname(__file__)), "data", "sample20000.txt"
- )
- with open(data, "r", encoding="utf-8") as f:
- lines = [_.strip("\n\r\t ") for _ in f.readlines()]
- queries = [(None, _) for _ in lines]
- temp = get_temp_folder(__file__, "temp_build_dynamic_trie_mks_min")
- clog = CustomLog(temp)
- clog("build trie")
- trie = CompletionTrieNode.build(queries)
- fLOG(
- len(queries),
- len(set(_[1] for _ in queries)),
- len(list(trie.leaves())),
- len(set(trie.leaves())),
- )
-
- self.assertTrue("Cannes 2005" in set(_[1] for _ in queries))
- self.assertTrue("Cannes 2005" in set(_.value for _ in trie.leaves()))
-
- clog("precompute")
- trie.precompute_stat()
- clog("update")
- trie.update_stat_dynamic()
- clog("loop")
- fLOG("loop")
- for i, q in enumerate(queries):
- if i % 1000 == 0:
- clog(i)
- fLOG(i)
- leave = trie.find(q[1])
- if leave.stat is None:
- raise AssertionError(f"None for {leave}")
-
- self.assertTrue(hasattr(leave, "stat"))
- self.assertTrue(hasattr(leave.stat, "mks0"))
- self.assertTrue(hasattr(leave.stat, "mks1"))
-
- sug = leave.all_mks_completions()
- nb_ = [
- (a.value, len([s.value for _, s in b if s.value == q[1]]))
- for a, b in sug
- ]
- nbf_ = [(a.value, len(b)) for a, b in sug]
- nb = sum(_[1] for _ in nb_)
- mnb = max(_[1] for _ in nbf_)
- if nb == 0 and len(q[1]) > 10:
- info = f"nb={nb} mnb={mnb} q='{q[1]}'"
- st = leave.stat.str_mks()
- text = leave.str_all_completions()
- text2 = leave.str_all_completions(use_precompute=False)
- raise AssertionError(
- f"{info}\n---\nleave='{leave.value}'\n{st}\n---\n{text}\n---\n{text2}"
- )
-
- mk1 = trie.min_keystroke0(leave.value)
- try:
- mk = trie.min_dynamic_keystroke(leave.value)
- mk2 = trie.min_dynamic_keystroke2(leave.value)
- except Exception as e:
- raise RuntimeError(
- f"{id(trie)}-{id(leave)}-{str(leave)}-{leave.leave}"
- ) from e
-
- if mk[0] > mk1[0]:
- st = leave.stat.str_mks()
- text = leave.str_all_completions()
- text2 = leave.str_all_completions(use_precompute=False)
- raise RuntimeError(
- "weird {0} > {1} -- leave='{2}'\n{3}\n---\n"
- "{4}\n---\n{5}".format(mk, mk1, leave.value, st, text, text2)
- )
- if mk2[0] < mk[0]:
- st = leave.stat.str_mks()
- text = leave.str_all_completions()
- text2 = leave.str_all_completions(use_precompute=False)
- raise RuntimeError(
- "weird {0} > {1} -- leave='{2}'\n{3}\n---\n{4}\n---\n{5}".format(
- mk, mk2, leave.value, st, text, text2
- )
- )
- clog("end")
- fLOG("end")
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/_unittests/ut_run_long/test_LONG_run_notebooks_dsgarden.py b/_unittests/ut_run_long/test_LONG_run_notebooks_dsgarden.py
deleted file mode 100644
index d14e3ca6..00000000
--- a/_unittests/ut_run_long/test_LONG_run_notebooks_dsgarden.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=33s)
-"""
-import os
-import unittest
-from pyquickhelper.pycode import get_temp_folder
-from pyquickhelper.ipythonhelper import (
- execute_notebook_list,
- execute_notebook_list_finalize_ut,
-)
-import mlstatpy
-
-
-class TestRunNotebooksDsGarden(unittest.TestCase):
- def test_run_notebook(self):
- temp = get_temp_folder(__file__, "temp_run_notebooks_dsgarden")
-
- # selection of notebooks
- fnb = os.path.normpath(
- os.path.join(
- os.path.abspath(os.path.dirname(__file__)),
- "..",
- "..",
- "_doc",
- "notebooks",
- "dsgarden",
- )
- )
- keepnote = []
- for f in os.listdir(fnb):
- if os.path.splitext(f)[-1] == ".ipynb" and "long" not in f:
- keepnote.append(os.path.join(fnb, f))
- self.assertTrue(len(keepnote) > 0)
-
- # function to tell that a can be run
- def valid(cell):
- if "open_html_form" in cell:
- return False
- if "open_window_params" in cell:
- return False
- if '
0)
-
- import pyquickhelper # pylint: disable=C0415
- import jyquickhelper # pylint: disable=C0415
- import pyensae # pylint: disable=C0415
-
- add_path = get_additional_paths(
- [jyquickhelper, pyquickhelper, pyensae, thismodule]
- )
- res = execute_notebook_list(
- temp, keepnote, additional_path=add_path, valid=valid
- )
- execute_notebook_list_finalize_ut(res, dump=thismodule)
-
- def test_notebook_benchmark(self):
- self.a_test_notebook_runner("benchmark", "ml")
-
- def test_notebook_logreg_voronoi(self):
- self.a_test_notebook_runner("logreg_voronoi", "ml")
-
- def test_notebook_mf_acp(self):
- self.a_test_notebook_runner("mf_acp", "ml")
-
- def test_notebook_neural_tree(self):
- self.a_test_notebook_runner("neural_tree", "ml")
-
- def test_notebook_piecewise_linear_regression(self):
- self.a_test_notebook_runner("piecewise_linear_regression", "ml")
-
- def test_notebook_regression_no_inversion(self):
- self.a_test_notebook_runner("regression_no_inversion", "ml")
-
- def test_notebook_valeurs_manquantes_mf(self):
- self.a_test_notebook_runner("valeurs_manquantes_mf", "ml")
-
- def test_notebook_reseau_neurones(self):
- self.a_test_notebook_runner("reseau_neurones", "ml")
-
- def test_notebook_survival(self):
- self.a_test_notebook_runner("survival", "ml")
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/_unittests/ut_run_long/test_LONG_run_notebooks_nlp.py b/_unittests/ut_run_long/test_LONG_run_notebooks_nlp.py
deleted file mode 100644
index 57b982f8..00000000
--- a/_unittests/ut_run_long/test_LONG_run_notebooks_nlp.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-@brief test log(time=571s)
-"""
-import os
-import unittest
-from pyquickhelper.loghelper import CustomLog
-from pyquickhelper.pycode import get_temp_folder, add_missing_development_version
-from pyquickhelper.ipythonhelper import (
- execute_notebook_list,
- execute_notebook_list_finalize_ut,
-)
-import mlstatpy
-
-
-class TestLONGRunNotebooksNLP(unittest.TestCase):
- def setUp(self):
- add_missing_development_version(
- ["pymyinstall", "pyensae", "pymmails", "jyquickhelper"], __file__, hide=True
- )
-
- def test_long_run_notebook(self):
- temp = get_temp_folder(__file__, "temp_run_notebooks_nlp")
-
- # selection of notebooks
- fnb = os.path.normpath(
- os.path.join(
- os.path.abspath(os.path.dirname(__file__)),
- "..",
- "..",
- "_doc",
- "notebooks",
- "nlp",
- )
- )
- keepnote = []
- for f in os.listdir(fnb):
- if os.path.splitext(f)[-1] == ".ipynb" and "_long" in f:
- keepnote.append(os.path.join(fnb, f))
-
- # function to tell that a can be run
- def valid(cell):
- if "open_html_form" in cell:
- return False
- if "open_html_form" in cell:
- return False
- if (
- "[50000, 100000, 200000, 500000, 500000, 1000000, 2000000, None]"
- in cell
- ):
- return False
- if '
int:
+ ppath = os.environ.get("PYTHONPATH", "")
+ if len(ppath) == 0:
+ os.environ["PYTHONPATH"] = ROOT
+ elif ROOT not in ppath:
+ sep = ";" if sys.platform == "win32" else ":"
+ os.environ["PYTHONPATH"] = ppath + sep + ROOT
+ perf = time.perf_counter()
+ try:
+ mod = import_source(fold, os.path.splitext(name)[0])
+ assert mod is not None
+ except FileNotFoundError:
+ # try another way
+ cmds = [sys.executable, "-u", os.path.join(fold, name)]
+ p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ res = p.communicate()
+ out, err = res
+ st = err.decode("ascii", errors="ignore")
+ if "No such file or directory" in st:
+ raise FileNotFoundError(st) # noqa: B904
+ if len(st) > 0 and "Traceback" in st:
+ if '"dot" not found in path.' in st:
+ # dot not installed, this part
+ # is tested in onnx framework
+ if verbose:
+ print(f"failed: {name!r} due to missing dot.")
+ return -1
+ raise AssertionError( # noqa: B904
+ f"Example {name!r} (cmd: {cmds!r} - "
+ f"exec_prefix={sys.exec_prefix!r}) "
+ f"failed due to\n{st}"
+ )
+ dt = time.perf_counter() - perf
+ if verbose:
+ print(f"{dt:.3f}: run {name!r}")
+ return 1
+
+ @classmethod
+ def add_test_methods(cls):
+ this = os.path.abspath(os.path.dirname(__file__))
+ folds = [
+ os.path.normpath(os.path.join(this, "..", "..", "_doc", "examples")),
+ ]
+ for fold in folds:
+ found = os.listdir(fold)
+ for name in found:
+ if name.startswith("plot_") and name.endswith(".py"):
+ short_name = os.path.split(os.path.splitext(name)[0])[-1]
+
+ if sys.platform == "win32" and (
+ "protobuf" in name or "td_note_2021" in name
+ ):
+
+ @unittest.skip("notebook with questions or issues with windows")
+ def _test_(self, name=name, fold=fold):
+ res = self.run_test(fold, name, verbose=VERBOSE)
+ self.assertIn(res, (-1, 1))
+
+ else:
+
+ def _test_(self, name=name, fold=fold):
+ res = self.run_test(fold, name, verbose=VERBOSE)
+ self.assertIn(res, (-1, 1))
+
+ setattr(cls, f"test_{short_name}", _test_)
+
+
+TestDocumentationExamples.add_test_methods()
+
+if __name__ == "__main__":
+ unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_documentation_notebook.py b/_unittests/ut_xrun_doc/test_documentation_notebook.py
new file mode 100644
index 00000000..de2e5872
--- /dev/null
+++ b/_unittests/ut_xrun_doc/test_documentation_notebook.py
@@ -0,0 +1,142 @@
+import unittest
+import os
+import sys
+import importlib
+import subprocess
+import time
+import warnings
+from nbconvert import PythonExporter
+from mlstatpy import __file__ as mlstatpy_file
+from mlstatpy.ext_test_case import ExtTestCase
+
+VERBOSE = 0
+ROOT = os.path.realpath(os.path.abspath(os.path.join(mlstatpy_file, "..", "..")))
+
+
+def import_source(module_file_path, module_name):
+ if not os.path.exists(module_file_path):
+ raise FileNotFoundError(module_file_path)
+ module_spec = importlib.util.spec_from_file_location(module_name, module_file_path)
+ if module_spec is None:
+ raise RuntimeError(
+ f"Unable to find or execute {module_name!r} in {module_file_path!r}."
+ )
+ module = importlib.util.module_from_spec(module_spec)
+ return module_spec.loader.exec_module(module)
+
+
+class TestDocumentationNotebook(ExtTestCase):
+ def post_process(self, content):
+ lines = []
+ for line in content.split("\n"):
+ if "get_ipython()" in line:
+ line = "# " + line
+ lines.append(line)
+ return "\n".join(lines)
+
+ def run_test(self, nb_name: str, verbose=0) -> int:
+ ppath = os.environ.get("PYTHONPATH", "")
+ if len(ppath) == 0:
+ os.environ["PYTHONPATH"] = ROOT
+ elif ROOT not in ppath:
+ sep = ";" if sys.platform == "win32" else ":"
+ os.environ["PYTHONPATH"] = ppath + sep + ROOT
+
+ perf = time.perf_counter()
+
+ exporter = PythonExporter()
+ content = self.post_process(exporter.from_filename(nb_name)[0])
+ bcontent = content.encode("utf-8")
+
+ tmp = "temp_notebooks"
+ if not os.path.exists(tmp):
+ os.mkdir(tmp)
+ # with tempfile.NamedTemporaryFile(suffix=".py") as tmp:
+ name = os.path.splitext(os.path.split(nb_name)[-1])[0]
+ if os.path.exists(tmp):
+ tmp_name = os.path.join(tmp, name + ".py")
+ self.assertEndsWith(tmp_name, ".py")
+ with open(tmp_name, "wb") as f:
+ f.write(bcontent)
+
+ fold, name = os.path.split(tmp_name)
+
+ try:
+ mod = import_source(fold, os.path.splitext(name)[0])
+ assert mod is not None
+ except (FileNotFoundError, RuntimeError):
+ # try another way
+ cmds = [sys.executable, "-u", tmp_name]
+ p = subprocess.Popen(
+ cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+ )
+ res = p.communicate()
+ out, err = res
+ st = err.decode("ascii", errors="ignore")
+ if "No such file or directory" in st:
+ raise FileNotFoundError(st) # noqa: B904
+ if len(st) > 0 and "Traceback" in st:
+ msg = (
+ f"Example {nb_name!r} (cmd: {cmds} - "
+ f"exec_prefix={sys.exec_prefix!r}) "
+ f"failed due to\n{st}"
+ )
+ if "CERTIFICATE_VERIFY_FAILED" in st and sys.platform == "win32":
+ warnings.warn(msg, stacklevel=0)
+ else:
+ raise AssertionError(msg) # noqa: B904
+
+ dt = time.perf_counter() - perf
+ if verbose:
+ print(f"{dt:.3f}: run {name!r}")
+ return 1
+
+ @classmethod
+ def add_test_methods_path(cls, fold):
+ found = os.listdir(fold)
+ last = os.path.split(fold)[-1]
+ for name in found:
+ if name.endswith(".ipynb"):
+ fullname = os.path.join(fold, name)
+ if "interro_rapide_" in name or (
+ sys.platform == "win32"
+ and (
+ "protobuf" in name
+ or "td_note_2021" in name
+ or "nb_pandas" in name
+ )
+ ):
+
+ @unittest.skip("notebook with questions or issues with windows")
+ def _test_(self, fullname=fullname):
+ res = self.run_test(fullname, verbose=VERBOSE)
+ self.assertIn(res, (-1, 1))
+
+ else:
+
+ def _test_(self, fullname=fullname):
+ res = self.run_test(fullname, verbose=VERBOSE)
+ self.assertIn(res, (-1, 1))
+
+ lasts = last.replace("-", "_")
+ names = os.path.splitext(name)[0].replace("-", "_")
+ setattr(cls, f"test_{lasts}_{names}", _test_)
+
+ @classmethod
+ def add_test_methods(cls):
+ this = os.path.abspath(os.path.dirname(__file__))
+ folds = [
+ os.path.join(this, "..", "..", "_doc", "notebooks", "dsgarden"),
+ os.path.join(this, "..", "..", "_doc", "notebooks", "image"),
+ os.path.join(this, "..", "..", "_doc", "notebooks", "metric"),
+ os.path.join(this, "..", "..", "_doc", "notebooks", "ml"),
+ os.path.join(this, "..", "..", "_doc", "notebooks", "nlp"),
+ ]
+ for fold in folds:
+ cls.add_test_methods_path(os.path.normpath(fold))
+
+
+TestDocumentationNotebook.add_test_methods()
+
+if __name__ == "__main__":
+ unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_measure_time.py b/_unittests/ut_xrun_doc/test_measure_time.py
new file mode 100644
index 00000000..84a4cfc9
--- /dev/null
+++ b/_unittests/ut_xrun_doc/test_measure_time.py
@@ -0,0 +1,14 @@
+import unittest
+from math import cos
+from mlstatpy.ext_test_case import ExtTestCase, measure_time
+
+
+class TestMeasureTime(ExtTestCase):
+ def test_measure_time(self):
+ res = measure_time(lambda: cos(5))
+ self.assertIsInstance(res, dict)
+ self.assertIn("average", res)
+
+
+if __name__ == "__main__":
+ unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_normalize_notebook.py b/_unittests/ut_xrun_doc/test_normalize_notebook.py
new file mode 100644
index 00000000..1cbfa7a8
--- /dev/null
+++ b/_unittests/ut_xrun_doc/test_normalize_notebook.py
@@ -0,0 +1,84 @@
+import unittest
+import os
+import pprint
+from nbformat import reader, writes
+from nbformat.validator import normalize
+from mlstatpy import __file__ as mlstatpy_file
+from mlstatpy.ext_test_case import ExtTestCase
+
+VERBOSE = 0
+ROOT = os.path.realpath(os.path.abspath(os.path.join(mlstatpy_file, "..", "..")))
+
+
+class TestDocumentationNotebook(ExtTestCase):
+ def post_process(self, content):
+ lines = []
+ for line in content.split("\n"):
+ if "get_ipython()" in line:
+ line = "# " + line
+ lines.append(line)
+ return "\n".join(lines)
+
+ def run_test(self, nb_name: str, verbose=0) -> int:
+ with open(nb_name, "r", encoding="utf-8") as f:
+ content = f.read()
+ if "sys.path.append" in content and "module_file_regex.ipynb" not in nb_name:
+ raise AssertionError(
+ f"'sys.path.append' was found in notebook {nb_name!r}."
+ )
+ nbdict = reader.reads(content)
+ new_dict = normalize(nbdict)
+ try:
+ new_content = writes(new_dict[1], version=4)
+ except AttributeError as e:
+ raise AssertionError(
+ f"Cannot convert {nb_name!r}\n----\n{pprint.pformat(nbdict)}"
+ f"\n-----\n{pprint.pformat(new_dict)}"
+ ) from e
+ if content != new_content:
+ if os.environ.get("NB_NORMALIZE", 0) in (1, "1"):
+ if verbose:
+ print(f"[nbformat] normalize {nb_name!r}.")
+ with open(nb_name, "w", encoding="utf-8") as f:
+ f.write(new_content)
+ return 1
+ raise AssertionError(
+ f"Normalization should be run on {nb_name!r}. "
+ f"Set NB_NORMALIZE=1 and rerun this file."
+ )
+ return 1
+
+ @classmethod
+ def add_test_methods_path(cls, fold):
+ found = os.listdir(fold)
+ last = os.path.split(fold)[-1]
+ for name in found:
+ if name.endswith(".ipynb"):
+ fullname = os.path.join(fold, name)
+
+ def _test_(self, fullname=fullname):
+ res = self.run_test(fullname, verbose=VERBOSE)
+ self.assertIn(res, (-1, 1))
+
+ lasts = last.replace("-", "_")
+ names = os.path.splitext(name)[0].replace("-", "_")
+ setattr(cls, f"test_{lasts}_{names}", _test_)
+
+ @classmethod
+ def add_test_methods(cls):
+ this = os.path.abspath(os.path.dirname(__file__))
+ folds = [
+ os.path.join(this, "..", "..", "_doc", "notebooks", "dsgarden"),
+ os.path.join(this, "..", "..", "_doc", "notebooks", "image"),
+ os.path.join(this, "..", "..", "_doc", "notebooks", "metric"),
+ os.path.join(this, "..", "..", "_doc", "notebooks", "ml"),
+ os.path.join(this, "..", "..", "_doc", "notebooks", "nlp"),
+ ]
+ for fold in folds:
+ cls.add_test_methods_path(os.path.normpath(fold))
+
+
+TestDocumentationNotebook.add_test_methods()
+
+if __name__ == "__main__":
+ unittest.main(verbosity=2)
diff --git a/mlstatpy/__init__.py b/mlstatpy/__init__.py
index e28d0c2e..83e6236e 100644
--- a/mlstatpy/__init__.py
+++ b/mlstatpy/__init__.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
__version__ = "0.4.0"
__author__ = "Xavier Dupré"
__github__ = "https://github.com/sdpython/mlstatpy"
diff --git a/mlstatpy/data/data_exceptions.py b/mlstatpy/data/data_exceptions.py
index 522b871c..4ad1e7a6 100644
--- a/mlstatpy/data/data_exceptions.py
+++ b/mlstatpy/data/data_exceptions.py
@@ -2,5 +2,3 @@ class DataException(Exception):
"""
raised when retrieving data
"""
-
- pass
diff --git a/mlstatpy/data/wikipedia.py b/mlstatpy/data/wikipedia.py
index de4af5c8..c35caeed 100644
--- a/mlstatpy/data/wikipedia.py
+++ b/mlstatpy/data/wikipedia.py
@@ -1,5 +1,5 @@
import os
-from pyquickhelper.filehelper import get_url_content_timeout, ungzip_files
+from mlstatpy.ext_test_case import get_url_content_timeout, ungzip_files
from .data_exceptions import DataException
@@ -34,9 +34,7 @@ def download_pageviews(dt, folder=".", unzip=True, timeout=-1, overwrite=False):
os.remove(name)
if isinstance(names, list):
if len(names) != 1:
- raise DataException( # pragma: no cover
- f"Expecting only one file, not '{names}'"
- )
+ raise DataException(f"Expecting only one file, not '{names}'")
return names[0]
return names
return name
@@ -54,7 +52,7 @@ def download_dump(country, name, folder=".", unzip=True, timeout=-1, overwrite=F
:param overwrite: overwrite
"""
url = "https://dumps.wikimedia.org/{0}wiki/latest/{0}wiki-{1}".format(country, name)
- file = url.split("/")[-1] # pylint: disable=C0207
+ file = url.split("/")[-1]
name = os.path.join(folder, file)
unzipname = os.path.splitext(name)[0]
if overwrite or (not os.path.exists(name) and not os.path.exists(unzipname)):
@@ -66,9 +64,7 @@ def download_dump(country, name, folder=".", unzip=True, timeout=-1, overwrite=F
os.remove(name)
if isinstance(names, list):
if len(names) != 1:
- raise DataException( # pragma: no cover
- f"Expecting only one file, not '{names}'"
- )
+ raise DataException(f"Expecting only one file, not '{names}'")
return names[0]
return names
return name[:-3] if name.endswith(".gz") else name
diff --git a/mlstatpy/ext_test_case.py b/mlstatpy/ext_test_case.py
new file mode 100644
index 00000000..380ed4be
--- /dev/null
+++ b/mlstatpy/ext_test_case.py
@@ -0,0 +1,796 @@
+import os
+import stat
+import sys
+import time
+import unittest
+import unicodedata
+import warnings
+from contextlib import redirect_stderr, redirect_stdout
+from io import StringIO, BytesIO
+from timeit import Timer
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import numpy
+from numpy.testing import assert_allclose
+
+
+class InternetException(RuntimeError):
+ """
+ Exception for the function @see fn get_url_content_timeout
+ """
+
+
+def get_url_content_timeout(
+ url,
+ timeout=10,
+ output=None,
+ encoding="utf8",
+ raise_exception=True,
+ chunk=None,
+ fLOG=None,
+):
+ """
+ Downloads a file from internet (by default, it assumes
+ it is text information, otherwise, encoding should be None).
+
+ :param url: (str) url
+ :param timeout: (int) in seconds, after this time,
+ the function drops an returns None, -1 for forever
+ :param output: (str) if None, the content is stored in that file
+ :param encoding: (str) utf8 by default, but if it is None,
+ the returned information is binary
+ :param raise_exception: (bool) True to raise an exception, False to send a warnings
+ :param chunk: (int|None) save data every chunk (only if output is not None)
+ :param fLOG: logging function (only applies when chunk is not None)
+ :return: content of the url
+
+ If the function automatically detects that the downloaded data is in gzip
+ format, it will decompress it.
+
+ The function raises the exception :class:`InternetException`.
+ """
+ import gzip
+ import socket
+ import urllib.error as urllib_error
+ import urllib.request as urllib_request
+ import http.client as http_client
+
+ try:
+ from http.client import InvalidURL
+ except ImportError:
+ InvalidURL = ValueError
+
+ def save_content(content, append=False):
+ "local function"
+ app = "a" if append else "w"
+ if encoding is not None:
+ with open(output, app, encoding=encoding) as f:
+ f.write(content)
+ else:
+ with open(output, app + "b") as f:
+ f.write(content)
+
+ try:
+ if chunk is not None:
+ if output is None:
+ raise ValueError("output cannot be None if chunk is not None")
+ app = [False]
+ size = [0]
+
+ def _local_loop(ur):
+ while True:
+ res = ur.read(chunk)
+ size[0] += len(res) # pylint: disable=E1137
+ if fLOG is not None:
+ fLOG("[get_url_content_timeout] downloaded", size, "bytes")
+ if len(res) > 0:
+ if encoding is not None:
+ res = res.decode(encoding=encoding)
+ save_content(res, app)
+ else:
+ break
+ app[0] = True # pylint: disable=E1137
+
+ if timeout != -1:
+ with urllib_request.urlopen(url, timeout=timeout) as ur:
+ _local_loop(ur)
+ else:
+ with urllib_request.urlopen(url) as ur:
+ _local_loop(ur)
+ app = app[0]
+ size = size[0]
+ else:
+ if timeout != -1:
+ with urllib_request.urlopen(url, timeout=timeout) as ur:
+ res = ur.read()
+ else:
+ with urllib_request.urlopen(url) as ur:
+ res = ur.read()
+ except (
+ urllib_error.HTTPError,
+ urllib_error.URLError,
+ ConnectionRefusedError,
+ socket.timeout,
+ ConnectionResetError,
+ http_client.BadStatusLine,
+ http_client.IncompleteRead,
+ ValueError,
+ InvalidURL,
+ ) as e:
+ if raise_exception:
+ raise InternetException(f"Unable to retrieve content url='{url}'") from e
+ warnings.warn(
+ f"Unable to retrieve content from '{url}' because of {e}",
+ ResourceWarning,
+ stacklevel=0,
+ )
+ return None
+ except Exception as e:
+ if raise_exception:
+ raise InternetException(
+ f"Unable to retrieve content, url='{url}', exc={e}"
+ ) from e
+ warnings.warn(
+ f"Unable to retrieve content from '{url}' "
+ f"because of unknown exception: {e}",
+ ResourceWarning,
+ stacklevel=0,
+ )
+ raise e
+
+ if chunk is None:
+ if len(res) >= 2 and res[:2] == b"\x1f\x8B":
+ # gzip format
+ res = gzip.decompress(res)
+
+ if encoding is not None:
+ try:
+ content = res.decode(encoding)
+ except UnicodeDecodeError as e:
+ # it tries different encoding
+
+ laste = [e]
+ othenc = ["iso-8859-1", "latin-1"]
+
+ for encode in othenc:
+ try:
+ content = res.decode(encode)
+ break
+ except UnicodeDecodeError as ee:
+ laste.append(ee)
+ content = None
+
+ if content is None:
+ mes = [f"Unable to parse text from '{url}'."]
+ mes.append("tried:" + str([*encoding, othenc]))
+ mes.append("beginning:\n" + str([res])[:50])
+ for e in laste:
+ mes.append("Exception: " + str(e))
+ raise ValueError("\n".join(mes)) from e
+ else:
+ content = res
+ else:
+ content = None
+
+ if output is not None and chunk is None:
+ save_content(content)
+
+ return content
+
+
+def unit_test_going():
+ """
+ Enables a flag telling the script is running while testing it.
+ Avois unit tests to be very long.
+ """
+ going = int(os.environ.get("UNITTEST_GOING", 0))
+ return going == 1
+
+
+def ignore_warnings(warns: List[Warning]) -> Callable:
+ """
+ Catches warnings.
+
+ :param warns: warnings to ignore
+ """
+
+ def wrapper(fct):
+ if warns is None:
+ raise AssertionError(f"warns cannot be None for '{fct}'.")
+
+ def call_f(self):
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore", warns)
+ return fct(self)
+
+ return call_f
+
+ return wrapper
+
+
+def measure_time(
+ stmt: Union[str, Callable],
+ context: Optional[Dict[str, Any]] = None,
+ repeat: int = 10,
+ number: int = 50,
+ warmup: int = 1,
+ div_by_number: bool = True,
+ max_time: Optional[float] = None,
+) -> Dict[str, Any]:
+ """
+ Measures a statement and returns the results as a dictionary.
+
+ :param stmt: string or callable
+ :param context: variable to know in a dictionary
+ :param repeat: average over *repeat* experiment
+ :param number: number of executions in one row
+ :param warmup: number of iteration to do before starting the
+ real measurement
+ :param div_by_number: divide by the number of executions
+ :param max_time: execute the statement until the total goes
+ beyond this time (approximatively), *repeat* is ignored,
+ *div_by_number* must be set to True
+ :return: dictionary
+
+ .. runpython::
+ :showcode:
+
+ from onnx_extended.ext_test_case import measure_time
+ from math import cos
+
+ res = measure_time(lambda: cos(0.5))
+ print(res)
+
+ See `Timer.repeat
`_
+ for a better understanding of parameter *repeat* and *number*.
+ The function returns a duration corresponding to
+ *number* times the execution of the main statement.
+ """
+ if not callable(stmt) and not isinstance(stmt, str):
+ raise TypeError(
+ f"stmt is not callable or a string but is of type {type(stmt)!r}."
+ )
+ if context is None:
+ context = {}
+
+ if isinstance(stmt, str):
+ tim = Timer(stmt, globals=context)
+ else:
+ tim = Timer(stmt)
+
+ if warmup > 0:
+ warmup_time = tim.timeit(warmup)
+ else:
+ warmup_time = 0
+
+ if max_time is not None:
+ if not div_by_number:
+ raise ValueError(
+ "div_by_number must be set to True of max_time is defined."
+ )
+ i = 1
+ total_time = 0
+ results = []
+ while True:
+ for j in (1, 2):
+ number = i * j
+ time_taken = tim.timeit(number)
+ results.append((number, time_taken))
+ total_time += time_taken
+ if total_time >= max_time:
+ break
+ if total_time >= max_time:
+ break
+ ratio = (max_time - total_time) / total_time
+ ratio = max(ratio, 1)
+ i = int(i * ratio)
+
+ res = numpy.array(results)
+ tw = res[:, 0].sum()
+ ttime = res[:, 1].sum()
+ mean = ttime / tw
+ ave = res[:, 1] / res[:, 0]
+ dev = (((ave - mean) ** 2 * res[:, 0]).sum() / tw) ** 0.5
+ mes = dict(
+ average=mean,
+ deviation=dev,
+ min_exec=numpy.min(ave),
+ max_exec=numpy.max(ave),
+ repeat=1,
+ number=tw,
+ ttime=ttime,
+ )
+ else:
+ res = numpy.array(tim.repeat(repeat=repeat, number=number))
+ if div_by_number:
+ res /= number
+
+ mean = numpy.mean(res)
+ dev = numpy.mean(res**2)
+ dev = (dev - mean**2) ** 0.5
+ mes = dict(
+ average=mean,
+ deviation=dev,
+ min_exec=numpy.min(res),
+ max_exec=numpy.max(res),
+ repeat=repeat,
+ number=number,
+ ttime=res.sum(),
+ )
+
+ if "values" in context:
+ if hasattr(context["values"], "shape"):
+ mes["size"] = context["values"].shape[0]
+ else:
+ mes["size"] = len(context["values"])
+ else:
+ mes["context_size"] = sys.getsizeof(context)
+ mes["warmup_time"] = warmup_time
+ return mes
+
+
+class ExtTestCase(unittest.TestCase):
+ _warns = []
+
+ def assertEndsWith(self, string, suffix):
+ if not string.endswith(suffix):
+ raise AssertionError(f"{string!r} does not end with {suffix!r}.")
+
+ def assertExists(self, name):
+ if not os.path.exists(name):
+ raise AssertionError(f"File or folder {name!r} does not exists.")
+
+ def assertEqual(self, *args, **kwargs):
+ if isinstance(args[0], numpy.ndarray):
+ self.assertEqualArray(*args, **kwargs)
+ else:
+ super().assertEqual(*args, **kwargs)
+
+ def assertNotEqualArray(
+ self,
+ expected: numpy.ndarray,
+ value: numpy.ndarray,
+ atol: float = 0,
+ rtol: float = 0,
+ ):
+ try:
+ self.assertEqualArray(expected, value, atol=atol, rtol=rtol)
+ except AssertionError:
+ return
+ raise AssertionError("Both arrays are equal.")
+
+ def assertEqualArray(
+ self,
+ expected: numpy.ndarray,
+ value: numpy.ndarray,
+ atol: float = 0,
+ rtol: float = 0,
+ ):
+ self.assertEqual(expected.dtype, value.dtype)
+ self.assertEqual(expected.shape, value.shape)
+ assert_allclose(expected, value, atol=atol, rtol=rtol)
+
+ def assertAlmostEqual(
+ self,
+ expected: numpy.ndarray,
+ value: numpy.ndarray,
+ atol: float = 0,
+ rtol: float = 0,
+ ):
+ if not isinstance(expected, numpy.ndarray):
+ expected = numpy.array(expected)
+ if not isinstance(value, numpy.ndarray):
+ value = numpy.array(value).astype(expected.dtype)
+ self.assertEqualArray(expected, value, atol=atol, rtol=rtol)
+
+ def assertRaise(self, fct: Callable, exc_type: Optional[Exception] = None):
+ exct = exc_type or Exception
+ try:
+ fct()
+ except exct as e:
+ if exc_type is not None and not isinstance(e, exc_type):
+ raise AssertionError(f"Unexpected exception {type(e)!r}.") # noqa: B904
+ return
+ raise AssertionError("No exception was raised.")
+
+ def assertEmpty(self, value: Any):
+ if value is None:
+ return
+ if len(value) == 0:
+ return
+ raise AssertionError(f"value is not empty: {value!r}.")
+
+ def assertNotEmpty(self, value: Any):
+ if value is None:
+ raise AssertionError(f"value is empty: {value!r}.")
+ if isinstance(value, (list, dict, tuple, set)):
+ if len(value) == 0:
+ raise AssertionError(f"value is empty: {value!r}.")
+
+ def assertStartsWith(self, prefix: str, full: str):
+ if not full.startswith(prefix):
+ raise AssertionError(f"prefix={prefix!r} does not start string {full!r}.")
+
+ def assertGreater(self, a, b):
+ if a < b:
+ raise AssertionError(f"{a} < {b}")
+
+ def assertLesser(self, a, b):
+ if a > b:
+ raise AssertionError(f"{a} > {b}")
+
+ @classmethod
+ def tearDownClass(cls):
+ for name, line, w in cls._warns:
+ warnings.warn(f"\n{name}:{line}: {type(w)}\n {str(w)}", stacklevel=0)
+
+ def capture(self, fct: Callable):
+ """
+ Runs a function and capture standard output and error.
+
+ :param fct: function to run
+ :return: result of *fct*, output, error
+ """
+ sout = StringIO()
+ serr = StringIO()
+ with redirect_stdout(sout), redirect_stderr(serr):
+ res = fct()
+ return res, sout.getvalue(), serr.getvalue()
+
+ @staticmethod
+ def profile(fct, sort="cumulative", rootrem=None, return_results=False):
+ """
+ Profiles the execution of a function with function
+ :func:`profile `.
+
+ :param fct: function to profile
+ :param sort: see :meth:`pstats.Stats.sort_stats`
+ :param rootrem: root to remove in filenames
+ :param return_results: return the results as well
+ :return: statistics text dump
+ """
+ from onnx_array_api.profiling import profile
+
+ return profile(fct, sort=sort, rootrem=rootrem, return_results=return_results)
+
+
+def remove_folder(top, remove_also_top=True, raise_exception=True):
+ """
+ Removes everything in folder *top*.
+
+ :param top: path to remove
+ :param remove_also_top: remove also root
+ :param raise_exception: raise an exception if a file cannot be remove
+ :return: list of removed files and folders
+ --> list of tuple ( (name, "file" or "dir") )
+ """
+ if top in {"", "C:", "c:", "C:\\", "c:\\", "d:", "D:", "D:\\", "d:\\"}:
+ raise RuntimeError( # pragma: no cover
+ "top is a root (c: for example), this is not safe"
+ )
+
+ res = []
+ first_root = None
+ for root, dirs, files in os.walk(top, topdown=False):
+ for name in files:
+ t = os.path.join(root, name)
+ try:
+ os.remove(t)
+ except PermissionError as e: # pragma: no cover
+ if raise_exception:
+ raise PermissionError(f"unable to remove file {t}") from e
+ remove_also_top = False
+ continue
+ res.append((t, "file"))
+ for name in dirs:
+ t = os.path.join(root, name)
+ try:
+ os.rmdir(t)
+ except OSError as e:
+ if raise_exception:
+ raise OSError(f"unable to remove folder {t}") from e
+ remove_also_top = False # pragma: no cover
+ continue # pragma: no cover
+ res.append((t, "dir"))
+ if first_root is None:
+ first_root = root
+
+ if top is not None and remove_also_top:
+ res.append((top, "dir"))
+ os.rmdir(top)
+
+ return res
+
+
+def get_temp_folder(
+ thisfile, name=None, clean=True, create=True, persistent=False, path_name="tpath"
+):
+ """
+ Creates and returns a local temporary folder to store files
+ when unit testing.
+
+ :param thisfile: use ``__file__`` or the function which runs the test
+ :param name: name of the temporary folder
+ :param clean: if True, clean the folder first, it can also a function
+ called to determine whether or not the folder should be cleaned
+ :param create: if True, creates it (empty if clean is True)
+ :param persistent: if True, create a folder at root level to reduce path length,
+ the function checks the ``MAX_PATH`` variable and
+ shorten the test folder is *max_path* is True on :epkg:`Windows`,
+ on :epkg:`Linux`, it creates a folder three level ahead
+ :param path_name: test path used when *max_path* is True
+ :return: temporary folder
+
+ The function extracts the file which runs this test and will name
+ the temporary folder base on the name of the method. *name* must be None.
+
+ Parameter *clean* can be a function.
+ Signature is ``def clean(folder)``.
+ """
+ if name is None:
+ name = thisfile.__name__
+ if name.startswith("test_"):
+ name = "temp_" + name[5:]
+ elif not name.startswith("temp_"):
+ name = "temp_" + name
+ thisfile = os.path.abspath(thisfile.__func__.__code__.co_filename)
+ final = os.path.split(name)[-1]
+
+ if not final.startswith("temp_") and not final.startswith("temp2_"):
+ raise NameError(f"the folder '{name}' must begin with temp_")
+
+ local = os.path.join(
+ os.path.normpath(os.path.abspath(os.path.dirname(thisfile))), name
+ )
+
+ if persistent:
+ if sys.platform.startswith("win"):
+ from ctypes.wintypes import MAX_PATH
+
+ if MAX_PATH <= 300:
+ local = os.path.join(os.path.abspath("\\" + path_name), name)
+ else:
+ local = os.path.join(local, "..", "..", "..", "..", path_name, name)
+ else:
+ local = os.path.join(local, "..", "..", "..", "..", path_name, name)
+ local = os.path.normpath(local)
+
+ if name == local:
+ raise NameError(f"The folder '{name}' must be relative, not absolute")
+
+ if not os.path.exists(local):
+ if create:
+ os.makedirs(local)
+ mode = os.stat(local).st_mode
+ nmode = mode | stat.S_IWRITE
+ if nmode != mode:
+ os.chmod(local, nmode)
+ else:
+ if (callable(clean) and clean(local)) or (not callable(clean) and clean):
+ # delayed import to speed up import time of pycode
+ remove_folder(local)
+ time.sleep(0.1)
+ if create and not os.path.exists(local):
+ os.makedirs(local)
+ mode = os.stat(local).st_mode
+ nmode = mode | stat.S_IWRITE
+ if nmode != mode:
+ os.chmod(local, nmode)
+
+ return local
+
+
+def noLOG(*args, **kwargs):
+ pass
+
+
+def unzip_files(
+ zipf, where_to=None, fLOG=noLOG, fvalid=None, remove_space=True, fail_if_error=True
+):
+ """
+ Unzips files from a zip archive.
+
+ :param zipf: archive (or bytes or BytesIO)
+ :param where_to: destination folder (can be None, the result is a list of tuple)
+ :param fLOG: logging function
+ :param fvalid: function which takes two paths (zip name, local name)
+ and return True if the file
+ must be unzipped, False otherwise, if None, the default answer is True
+ :param remove_space: remove spaces in created local path (+ ``',()``)
+ :param fail_if_error: fails if an error is encountered
+ (typically a weird character in a filename),
+ otherwise a warning is thrown.
+ :return: list of unzipped files
+ """
+ import zipfile
+
+ if isinstance(zipf, bytes):
+ zipf = BytesIO(zipf)
+
+ try:
+ with zipfile.ZipFile(zipf, "r"):
+ pass
+ except zipfile.BadZipFile as e: # pragma: no cover
+ if isinstance(zipf, BytesIO):
+ raise e
+ raise OSError(f"Unable to read file '{zipf}'") from e
+
+ files = []
+ with zipfile.ZipFile(zipf, "r") as file:
+ for info in file.infolist():
+ if fLOG:
+ fLOG(f"[unzip_files] unzip '{info.filename}'")
+ if where_to is None:
+ try:
+ content = file.read(info.filename)
+ except zipfile.BadZipFile as e: # pragma: no cover
+ if fail_if_error:
+ raise zipfile.BadZipFile(
+ f"Unable to extract '{info.filename}' due to {e}"
+ ) from e
+ warnings.warn(
+ f"Unable to extract '{info.filename}' due to {e}",
+ UserWarning,
+ stacklevel=0,
+ )
+ continue
+ files.append((info.filename, content))
+ else:
+ clean = remove_diacritics(info.filename)
+ if remove_space:
+ clean = (
+ clean.replace(" ", "")
+ .replace("'", "")
+ .replace(",", "_")
+ .replace("(", "_")
+ .replace(")", "_")
+ )
+ tos = os.path.join(where_to, clean)
+ if not os.path.exists(tos):
+ if fvalid and not fvalid(info.filename, tos):
+ fLOG("[unzip_files] skipping", info.filename)
+ continue
+ try:
+ data = file.read(info.filename)
+ except zipfile.BadZipFile as e: # pragma: no cover
+ if fail_if_error:
+ raise zipfile.BadZipFile(
+ f"Unable to extract '{info.filename}' due to {e}"
+ ) from e
+ warnings.warn(
+ f"Unable to extract '{info.filename}' due to {e}",
+ UserWarning,
+ stacklevel=0,
+ )
+ continue
+ # check encoding to avoid characters not allowed in paths
+ if not os.path.exists(tos):
+ if sys.platform.startswith("win"):
+ tos = tos.replace("/", "\\")
+ finalfolder = os.path.split(tos)[0]
+ if not os.path.exists(finalfolder):
+ fLOG(
+ "[unzip_files] creating folder (zip)",
+ os.path.abspath(finalfolder),
+ )
+ try:
+ os.makedirs(finalfolder)
+ except FileNotFoundError as e: # pragma: no cover
+ mes = (
+ "Unexpected error\ninfo.filename={0}\ntos={1}"
+ "\nfinalfolder={2}\nlen(nfinalfolder)={3}"
+ ).format(
+ info.filename, tos, finalfolder, len(finalfolder)
+ )
+ raise FileNotFoundError(mes) from e
+ if not info.filename.endswith("/"):
+ try:
+ with open(tos, "wb") as u:
+ u.write(data)
+ except FileNotFoundError as e: # pragma: no cover
+ # probably an issue in the path name
+ # the next lines are just here to distinguish
+ # between the two cases
+ if not os.path.exists(finalfolder):
+ raise e
+ newname = info.filename.replace(" ", "_").replace(
+ ",", "_"
+ )
+ if sys.platform.startswith("win"):
+ newname = newname.replace("/", "\\")
+ tos = os.path.join(where_to, newname)
+ finalfolder = os.path.split(tos)[0]
+ if not os.path.exists(finalfolder):
+ fLOG(
+ "[unzip_files] creating folder (zip)",
+ os.path.abspath(finalfolder),
+ )
+ os.makedirs(finalfolder)
+ with open(tos, "wb") as u:
+ u.write(data)
+ files.append(tos)
+ fLOG(
+ "[unzip_files] unzipped ", info.filename, " to ", tos
+ )
+ elif not tos.endswith("/"): # pragma: no cover
+ files.append(tos)
+ elif not info.filename.endswith("/"): # pragma: no cover
+ files.append(tos)
+ return files
+
+
+def ungzip_files(
+ filename,
+ where_to=None,
+ fLOG=noLOG,
+ fvalid=None,
+ remove_space=True,
+ unzip=True,
+ encoding=None,
+):
+ """
+ Uncompresses files from a gzip file.
+
+ :param filename: final gzip file (double compression, extension
+ should something like .zip.gz)
+ :param where_to: destination folder (can be None, the result is a list of tuple)
+ :param fLOG: logging function
+ :param fvalid: function which takes two paths (zip name, local name)
+ and return True if the file
+ must be unzipped, False otherwise, if None, the default answer is True
+ :param remove_space: remove spaces in created local path (+ ``',()``)
+ :param unzip: unzip file after gzip
+ :param encoding: encoding
+ :return: list of unzipped files
+ """
+ import gzip
+
+ if isinstance(filename, bytes):
+ is_file = False
+ filename = BytesIO(filename)
+ else:
+ is_file = True
+
+ if encoding is None:
+ f = gzip.open(filename, "rb")
+ content = f.read()
+ f.close()
+ if unzip:
+ try:
+ return unzip_files(content, where_to=where_to, fLOG=fLOG)
+ except Exception as e:
+ raise OSError(f"Unable to unzip file '{filename}'") from e
+ elif where_to is not None:
+ filename = os.path.split(filename)[-1].replace(".gz", "")
+ filename = os.path.join(where_to, filename)
+ with open(filename, "wb") as f:
+ f.write(content)
+ return filename
+ return content
+ else:
+ f = gzip.open(filename, "rt", encoding="utf-8")
+ content = f.read()
+ f.close()
+ if is_file:
+ filename = filename.replace(".gz", "")
+ with open(filename, "wb") as f:
+ f.write(content)
+ return filename
+ return content
+
+
+def remove_diacritics(input_str):
+ """
+ Removes diacritics.
+
+ :param input_str: string to clean
+ :return: cleaned string
+
+ Example::
+
+ enguérand --> enguerand
+ """
+ nkfd_form = unicodedata.normalize("NFKD", input_str)
+ only_ascii = nkfd_form.encode("ASCII", "ignore")
+ return only_ascii.decode("utf8")
diff --git a/mlstatpy/garden/poulet.py b/mlstatpy/garden/poulet.py
index fa5b6c72..934cf89c 100644
--- a/mlstatpy/garden/poulet.py
+++ b/mlstatpy/garden/poulet.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
import math
import random
@@ -53,7 +51,7 @@ def esperance(X, p, q, s, lx):
:return: espérance du profit
"""
res = 0.0
- for i in range(0, lx * 2):
+ for i in range(lx * 2):
res += profit(float(i), X, p, q, s) * proba_poisson(lx, i)
return res
@@ -70,7 +68,7 @@ def maximum(p, q, s, lx):
:return: liste ``(X, profit)``
"""
res = []
- for X in range(0, 2 * lx):
+ for X in range(2 * lx):
r = esperance(X, p, q, s, lx)
res.append((X, r))
return res
@@ -137,13 +135,13 @@ def histogramme_poisson_melange(params, coef, n=100000):
à la loi de paramètre ``params[i]``
:return: histogramme
"""
- h = [0.0 for i in range(0, 4 * max(params))]
- for i in range(0, n):
+ h = [0.0 for i in range(4 * max(params))]
+ for _i in range(n):
x = poisson_melange(params, coef)
if x < len(h):
h[x] += 1
s = sum(h)
- for i in range(0, len(h)):
+ for i in range(len(h)):
h[i] = float(h[i]) / s
return h
@@ -171,7 +169,7 @@ def local_proba_poisson_melange(params, coef, i):
histogramme_poisson_melange(params, coef)
)
if i >= len(proba_poisson_melange_tableau):
- return 0.0 # pragma: no cover
+ return 0.0
return proba_poisson_melange_tableau[i]
return local_proba_poisson_melange
diff --git a/mlstatpy/graph/graph_distance.py b/mlstatpy/graph/graph_distance.py
index 438425b6..e8d5e8a0 100644
--- a/mlstatpy/graph/graph_distance.py
+++ b/mlstatpy/graph/graph_distance.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
import copy
import re
@@ -68,7 +66,7 @@ class _Edge:
def __init__(self, from_, to, label, weight):
self.from_, self.to = from_, to
- self.nb = from_, to # pylint: disable=E1101
+ self.nb = from_, to
self.label = label
@@ -90,15 +88,15 @@ def __init__(self, from_, to, label, weight):
self.pair = (None, None)
self.weight = weight
if self.from_ == "00" and self.to == "00":
- raise AssertionError("should not happen") # pragma: no cover
+ raise AssertionError("should not happen")
if self.from_ == "11" and self.to == "11":
- raise AssertionError("should not happen") # pragma: no cover
+ raise AssertionError("should not happen")
def __str__(self):
"""
usual
"""
- return f"{self.nb[0]} -> {self.nb[1]} [{self.Label}]" # pylint: disable=E1101
+ return f"{self.nb[0]} -> {self.nb[1]} [{self.Label}]"
def __repr__(self):
"""
@@ -106,10 +104,10 @@ def __repr__(self):
"""
return "Edge({}, {}, {}, {})".format(
repr(self.nb[0]),
- repr(self.nb[1]), # pylint: disable=E1101
+ repr(self.nb[1]),
repr(self.Label),
self.weight,
- ) # pylint: disable=E1101
+ )
def is_vertex(self):
"""
@@ -219,7 +217,7 @@ def __getitem__(self, index):
return self.vertices[index]
if isinstance(index, tuple):
return self.edges[index]
- raise KeyError("unable to get element " + str(index)) # pragma: no cover
+ raise KeyError("unable to get element " + str(index))
@staticmethod
def load_from_file(filename, add_loop):
@@ -228,11 +226,11 @@ def load_from_file(filename, add_loop):
@param filename file name
@param add_loop @see me __init__
"""
- lines = open(filename, "r").readlines() # pylint: disable=R1732,W1514
+ with open(filename, "r") as f:
+ lines = f.readlines()
regV = re.compile('\\"?([a-z0-9_]+)\\"? *[[]label=\\"(.*)\\"[]]')
regE = re.compile(
- '\\"?([a-z0-9_]+)\\"? *-> *\\"?'
- + '([a-z0-9_]+)\\"? *[[]label=\\"(.*)\\"[]]'
+ '\\"?([a-z0-9_]+)\\"? *-> *\\"?([a-z0-9_]+)\\"? *[[]label=\\"(.*)\\"[]]'
)
edge_list = []
vertex_label = {}
@@ -247,7 +245,7 @@ def load_from_file(filename, add_loop):
g = ve.groups()
vertex_label[g[0]] = g[1]
if not vertex_label or not edge_list:
- raise OSError(f"Unable to parse file {filename!r}.") # pragma: no cover
+ raise OSError(f"Unable to parse file {filename!r}.")
return GraphDistance(edge_list, vertex_label, add_loop)
def _private__init__(self, add_loop, weight_vertex, weight_edge):
@@ -312,7 +310,7 @@ def __str__(self):
li = []
for v in self.vertices.values():
li.append(str(v))
- for k, e in self.edges.items():
+ for _k, e in self.edges.items():
li.append(str(e))
return "\n".join(li)
@@ -422,19 +420,19 @@ def tempF1_vertex(v1, v2, g1, g2, w1, w2):
if v2 is None:
return 0.0
if not v2.is_vertex():
- raise TypeError("v2 should be a vertex") # pragma: no cover
+ raise TypeError("v2 should be a vertex")
return v2.weight * w2
elif v2 is None:
if not v1.is_vertex():
- raise TypeError("v1 should be a vertex") # pragma: no cover
+ raise TypeError("v1 should be a vertex")
if not v1.is_vertex():
- raise TypeError("v1 should be a vertex") # pragma: no cover
+ raise TypeError("v1 should be a vertex")
return v1.weight * w1
else:
if not v1.is_vertex():
- raise TypeError("v1 should be a vertex") # pragma: no cover
+ raise TypeError("v1 should be a vertex")
if not v2.is_vertex():
- raise TypeError("v2 should be a vertex") # pragma: no cover
+ raise TypeError("v2 should be a vertex")
return (
0
if v1.label == v2.label
@@ -508,7 +506,7 @@ def common_paths(
while modif > 0:
modif = 0
add = {}
- for k, v in g.vertices.items():
+ for _k, v in g.vertices.items():
v1, v2 = v.pair
if not v.succE:
for e1 in v1.succE:
@@ -625,8 +623,7 @@ def enumerate_all_paths(self, edges_and_vertices, begin=None):
if v.label == self.labelEnd:
yield path
else:
- for p in self.enumerate_all_paths(edges_and_vertices, path):
- yield p
+ yield from self.enumerate_all_paths(edges_and_vertices, path)
def edit_distance_path(
self,
@@ -742,7 +739,7 @@ def cache_cost(func, a, b, g1, g2, w1, w2):
def private_count_left_right(self, valuesInList):
countLeft = {}
countRight = {}
- for k, v in valuesInList:
+ for _k, v in valuesInList:
i, j = v
if i not in countRight:
countRight[i] = {}
@@ -886,7 +883,7 @@ def distance_matching_graphs_paths(
print("[distance_matching_graphs_paths] pair_count_vertex")
pair_count_edge = {}
pair_count_vertex = {}
- for k, v in reduction:
+ for _k, v in reduction:
path = matrix_distance[v][1]
for el in path:
n1, n2 = el[2]
@@ -935,7 +932,7 @@ def distance_matching_graphs_paths(
newv = Vertex(v.nb, v.label, weight_vertex)
res_graph.vertices[k] = newv
if v.nb in count_vertex_right:
- ind = list(count_vertex_right[v.nb].keys())[0]
+ ind = list(count_vertex_right[v.nb].keys())[0] # noqa: RUF015
newv.pair = (v, graph2.vertices[ind])
doneVertex[ind] = newv
if newv.pair[0].label != newv.pair[1].label:
@@ -954,7 +951,7 @@ def distance_matching_graphs_paths(
newe = Edge(e.from_, e.to, e.label, weight_edge)
res_graph.edges[k] = newe
if e.nb in count_edge_right:
- ind = list(count_edge_right[e.nb].keys())[0]
+ ind = list(count_edge_right[e.nb].keys())[0] # noqa: RUF015
newe.pair = (e, graph2.edges[ind])
done_edge[ind] = newe
else:
@@ -964,21 +961,21 @@ def distance_matching_graphs_paths(
if k in done_edge:
continue
from_ = (
- list(count_vertex_left[e.from_].keys())[0]
+ list(count_vertex_left[e.from_].keys())[0] # noqa: RUF015
if e.from_ in count_vertex_left
else f"2a.{e.from_}"
)
to = (
- list(count_vertex_left[e.to].keys())[0]
+ list(count_vertex_left[e.to].keys())[0] # noqa: RUF015
if e.to in count_vertex_left
else f"2a.{e.to}"
)
if from_ not in res_graph.vertices:
- raise RuntimeError("should not happen " + from_) # pragma: no cover
+ raise RuntimeError("should not happen " + from_)
if to not in res_graph.vertices:
- raise RuntimeError("should not happen " + to) # pragma: no cover
+ raise RuntimeError("should not happen " + to)
newe = Edge(from_, to, e.label, weight_edge)
- res_graph.edges[newe.nb] = newe # pylint: disable=E1101
+ res_graph.edges[newe.nb] = newe
newe.pair = (None, e)
if verbose > 0:
@@ -1011,9 +1008,9 @@ def draw_vertices_edges(self):
elif v.pair[0] is None:
vertices.append((k, "+" + v.label, "green"))
else:
- raise RuntimeError("?") # pragma: no cover
+ raise RuntimeError("?")
- for k, v in self.edges.items():
+ for _k, v in self.edges.items():
if v.pair == (None, None) or (
v.pair[0] is not None and v.pair[1] is not None
):
@@ -1023,6 +1020,6 @@ def draw_vertices_edges(self):
elif v.pair[0] is None:
edges.append((v.from_, v.to, "+" + v.label, "green"))
else:
- raise RuntimeError("?") # pragma: no cover
+ raise RuntimeError("?")
return vertices, edges
diff --git a/mlstatpy/graph/graphviz_helper.py b/mlstatpy/graph/graphviz_helper.py
deleted file mode 100644
index deba3121..00000000
--- a/mlstatpy/graph/graphviz_helper.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import os
-import sys
-from pyquickhelper.loghelper import run_cmd
-from pyquickhelper.helpgen.conf_path_tools import find_graphviz_dot
-
-
-def run_graphviz(filename, image, engine="dot"):
- """
- Run :epkg:`GraphViz`.
-
- @param filename filename which contains the graph definition
- @param image output image
- @param engine *dot* or *neato*
- @return output of graphviz
- """
- ext = os.path.splitext(image)[-1]
- if ext != ".png":
- raise RuntimeError("extension should be .png not " + str(ext))
- if sys.platform.startswith("win"):
- bin_ = os.path.dirname(find_graphviz_dot())
- # if bin not in os.environ["PATH"]:
- # os.environ["PATH"] = os.environ["PATH"] + ";" + bin
- cmd = f'"{bin_}\\{engine}" -Tpng "{filename}" -o "{image}"'
- else:
- cmd = f'"{engine}" -Tpng "{filename}" -o "{image}"'
- out, err = run_cmd(cmd, wait=True)
- if err:
- raise RuntimeError(
- f"Unable to run Graphviz\nCMD:\n{cmd}\nOUT:\n{out}\nERR:\n{err}"
- )
- return out
-
-
-def edges2gv(vertices, edges):
- """
- Converts a graph into a :epkg:`GraphViz` file format.
-
- @param edges see below
- @param vertices see below
- @return gv format
-
- The function creates a file ``.gv``.
-
- .. runpython::
- :showcode:
-
- from mlstatpy.graph.graphviz_helper import edges2gv
- gv = edges2gv([(1, "eee", "red")],
- [(1, 2, "blue"), (3, 4), (1, 3)])
- print(gv)
-
- """
- memovertex = {}
- for v in vertices:
- if isinstance(v, tuple):
- if len(v) == 1:
- memovertex[v[0]] = None
- else:
- memovertex[v[0]] = v[1:]
- else:
- memovertex[v] = None
- for edge in edges:
- i, j = edge[:2]
- if i not in memovertex:
- memovertex[i] = None
- if j not in memovertex:
- memovertex[j] = None
-
- li = ["digraph{"]
- for k, v in memovertex.items():
- if v is None:
- li.append(f"{k} ;")
- elif len(v) == 1:
- li.append(f'"{k}" [label="{v[0]}"];')
- elif len(v) == 2:
- li.append(f'"{k}" [label="{v[0]}",fillcolor={v[1]},color={v[1]}];')
- else:
- raise ValueError("unable to understand " + str(v))
-
- for edge in edges:
- i, j = edge[:2]
- if len(edge) == 2:
- li.append(f'"{i}" -> "{j}";')
- elif len(edge) == 3:
- li.append(f'"{i}" -> "{j}" [label="{edge[2]}"];')
- elif len(edge) == 4:
- li.append(f'"{i}" -> "{j}" [label="{edge[2]}",color={edge[3]}];')
- else:
- raise ValueError("unable to understand " + str(edge))
- li.append("}")
-
- text = "\n".join(li)
- return text
-
-
-def draw_graph_graphviz(vertices, edges, image=None, engine="dot"):
- """
- Draws a graph using :epkg:`Graphviz`.
-
- @param edges see below
- @param vertices see below
- @param image output image, None, just returns the output
- @param engine *dot* or *neato*
- @return :epkg:`Graphviz` output or
- the dot text if *image* is None
-
- The function creates a file ``.gv`` if *image* is not None.
- ::
-
- edges = [ (1,2, label, color), (3,4), (1,3), ... ] , liste d'arcs
- vertices = [ (1, label, color), (2), ... ] , liste de noeuds
- image = nom d'image (format png)
-
- """
- text = edges2gv(vertices, edges)
- if image is None:
- return text
- filename = image + ".gv"
- with open(filename, "w", encoding="utf-8") as f:
- f.write(text)
-
- out = run_graphviz(filename, image, engine=engine)
- if not os.path.exists(image):
- raise FileNotFoundError(f"GraphViz failed with no reason. '{image}' not found.")
- return out
diff --git a/mlstatpy/image/detection_segment/detection_nfa.py b/mlstatpy/image/detection_segment/detection_nfa.py
index 485b5280..9a1410c0 100644
--- a/mlstatpy/image/detection_segment/detection_nfa.py
+++ b/mlstatpy/image/detection_segment/detection_nfa.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
from .geometrie import Segment
@@ -92,7 +90,7 @@ def extremite(self):
"""
ext = []
if self.has_aligned_point():
- for i in range(0, len(self)):
+ for i in range(len(self)):
if self.info_ligne[i].aligne and (
i == 0
or i == len(self) - 1
diff --git a/mlstatpy/image/detection_segment/detection_segment.py b/mlstatpy/image/detection_segment/detection_segment.py
index 5c36f84a..08c68106 100644
--- a/mlstatpy/image/detection_segment/detection_segment.py
+++ b/mlstatpy/image/detection_segment/detection_segment.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
import math
import copy
import time
@@ -70,20 +68,20 @@ def _load_image(img, format="PIL", mode=None):
elif len(img.shape) == 3:
gray = img.shape[0] * img.shape[1] * img.shape[2] - d1 * d0
else:
- raise ValueError(f"Unexpected shape {img.shape}") # pragma: no cover
+ raise ValueError(f"Unexpected shape {img.shape}")
if gray == 0:
img = img.reshape((d1, d0))
else:
img = img.reshape((d1, d0, 3))
return img
- raise ValueError(f"Unexpected value for fomat: '{format}'") # pragma: no cover
+ raise ValueError(f"Unexpected value for fomat: '{format}'")
if isinstance(img, numpy.ndarray):
if format == "array":
return img
if format == "PIL":
return Image.fromarray(img, mode=mode)
- raise ValueError(f"Unexpected value for fomat: '{format}'") # pragma: no cover
- raise TypeError(f"numpy array expected not {type(img)}") # pragma: no cover
+ raise ValueError(f"Unexpected value for fomat: '{format}'")
+ raise TypeError(f"numpy array expected not {type(img)}")
def compute_gradient(img, color=None):
@@ -118,7 +116,7 @@ def _calcule_gradient(img, color=None):
dy1 = img[1:-1, :] - img[:-2, :]
dy2 = img[2:, :] - img[1:-1, :]
dy = (dy1 + dy2) / 2
- res = numpy.zeros(img.shape + (2,))
+ res = numpy.zeros((*img.shape, 2))
res[:, 1:-1, 0] = dx
res[1:-1, :, 1] = dy
return res
@@ -136,8 +134,8 @@ def plot_gradient(image, gradient, more=None, direction=-1):
image = ImageDraw.Draw(image_)
X, Y = image_.size
if direction != -1:
- for x in range(0, X - 1):
- for y in range(0, Y - 1):
+ for x in range(X - 1):
+ for y in range(Y - 1):
n = gradient[y, x]
if more is None:
v = int((n[0] ** 2 + n[1] ** 2) ** 0.5 + 0.5)
@@ -150,8 +148,8 @@ def plot_gradient(image, gradient, more=None, direction=-1):
pass
elif direction > 0:
# on dessine des petits gradients dans l'image
- for x in range(0, X, direction):
- for y in range(0, Y, direction):
+ for x in range(X, direction):
+ for y in range(Y, direction):
n = gradient[y, x]
t = (n[0] ** 2 + n[1] ** 2) ** 0.5
if t == 0:
@@ -167,8 +165,8 @@ def plot_gradient(image, gradient, more=None, direction=-1):
elif direction == -2:
# derniere solution, la couleur represente l'orientation
# en chaque point de l'image
- for x in range(0, X):
- for y in range(0, Y):
+ for x in range(X):
+ for y in range(Y):
n = gradient[y, x]
i = int(-n[0] * 10 + 128)
j = int(n[1] * 10 + 128)
@@ -176,9 +174,7 @@ def plot_gradient(image, gradient, more=None, direction=-1):
i, j = max(i, 0), max(j, 0)
image.line([(x, y), (x, y)], fill=(0, j, i))
else:
- raise ValueError( # pragma: no cover
- f"Unexpected value for direction={direction}"
- )
+ raise ValueError(f"Unexpected value for direction={direction}")
return image_
@@ -207,7 +203,7 @@ def plot_segments(image, segments, outfile=None, color=(255, 0, 0)):
def detect_segments(
image,
proba_bin=1.0 / 16,
- cos_angle=math.cos(1.0 / 16 / 2 * (math.pi * 2)),
+ cos_angle=math.cos(1.0 / 16 / 2 * (math.pi * 2)), # noqa: B008
seuil_nfa=1e-5,
seuil_norme=2,
angle=math.pi / 24.0,
@@ -259,7 +255,7 @@ def detect_segments(
# on cree une classe permettant de recevoir les informations relatives
# a l'image et au gradient pour un segment reliant deux points
# du contour de l'image
- points = [InformationPoint(Point(0, 0), False, 0) for i in range(0, xx + yy)]
+ points = [InformationPoint(Point(0, 0), False, 0) for i in range(xx + yy)]
ligne = LigneGradient(points, seuil_norme=seuil_norme, seuil_nfa=seuil_nfa)
# premier segment
@@ -287,12 +283,12 @@ def detect_segments(
not_aligned += 1
# on passe au segment suivant
- cont = seg.next() # pylint: disable=E1102
+ cont = seg.next()
n += 1
# pour verifier que cela avance
if verbose and n % 1000 == 0:
- print( # pragma: no cover
+ print(
"n = ",
n,
" ... ",
diff --git a/mlstatpy/image/detection_segment/detection_segment_bord.py b/mlstatpy/image/detection_segment/detection_segment_bord.py
index 13a1cabf..8aac9233 100644
--- a/mlstatpy/image/detection_segment/detection_segment_bord.py
+++ b/mlstatpy/image/detection_segment/detection_segment_bord.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
import copy
import numpy
from .geometrie import Segment, Point
diff --git a/mlstatpy/image/detection_segment/detection_segment_segangle.py b/mlstatpy/image/detection_segment/detection_segment_segangle.py
index d38b7582..b488dcf1 100644
--- a/mlstatpy/image/detection_segment/detection_segment_segangle.py
+++ b/mlstatpy/image/detection_segment/detection_segment_segangle.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
import math
import copy
from .detection_segment_bord import SegmentBord_Commun
diff --git a/mlstatpy/image/detection_segment/geometrie.py b/mlstatpy/image/detection_segment/geometrie.py
index 5279531e..2070f2f4 100644
--- a/mlstatpy/image/detection_segment/geometrie.py
+++ b/mlstatpy/image/detection_segment/geometrie.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
import math
import copy
import numpy
diff --git a/mlstatpy/image/detection_segment/queue_binom.py b/mlstatpy/image/detection_segment/queue_binom.py
index aad6074e..faa4c31f 100644
--- a/mlstatpy/image/detection_segment/queue_binom.py
+++ b/mlstatpy/image/detection_segment/queue_binom.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-
-
def tabule_queue_binom(n, p):
"""
Retourne un dictionnaire dont la clé est couple d'entiers *(a,b)*
diff --git a/mlstatpy/image/detection_segment/random_image.py b/mlstatpy/image/detection_segment/random_image.py
index 26b3125a..5bac8ffc 100644
--- a/mlstatpy/image/detection_segment/random_image.py
+++ b/mlstatpy/image/detection_segment/random_image.py
@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
-
import math
import numpy
-import numpy.random as nprnd # pylint: disable=E1101
+import numpy.random as nprnd
def random_noise_image(size, ratio=0.1):
@@ -51,7 +49,7 @@ def move_coordinate(x1, y1, x2, y2, X, Y):
lmin = int(mind * lmin)
lmax = int(mind * lmax)
size = nprnd.randint(lmin, lmax)
- angle = nprnd.random() * math.pi # pylint: disable=E1101
+ angle = nprnd.random() * math.pi
x1 = nprnd.randint(image.shape[1] - int(size * abs(math.cos(angle)) - 1))
y1 = nprnd.randint(image.shape[0] - int(size * math.sin(angle) - 1))
x2 = x1 + size * math.cos(angle)
@@ -62,27 +60,21 @@ def move_coordinate(x1, y1, x2, y2, X, Y):
t = nprnd.randint(0, size, int(size * density))
xs = t * math.cos(angle) + x1
ys = t * math.sin(angle) + x2
- noise = (
- nprnd.randn(xs.shape[0] * 2).reshape(xs.shape[0], 2) # pylint: disable=E1101
- * noise
- * mind
- )
+ noise = nprnd.randn(xs.shape[0] * 2).reshape(xs.shape[0], 2) * noise * mind
xs += noise[:, 0]
ys += noise[:, 1]
- xs = numpy.maximum(xs, numpy.zeros(xs.shape[0])) # pylint: disable=E1111
- ys = numpy.maximum(
- ys, numpy.zeros(xs.shape[0])
- ) # pylint: disable=E1111,E1101,E1136
+ xs = numpy.maximum(xs, numpy.zeros(xs.shape[0]))
+ ys = numpy.maximum(ys, numpy.zeros(xs.shape[0]))
xs = numpy.minimum(
- xs, numpy.zeros(xs.shape[0]) + image.shape[1] - 1 # pylint: disable=E1101,E1136
- ) # pylint: disable=E1111,E1101,E1136
+ xs,
+ numpy.zeros(xs.shape[0]) + image.shape[1] - 1,
+ )
ys = numpy.minimum(
- ys, numpy.zeros(xs.shape[0]) + image.shape[0] - 1 # pylint: disable=E1101,E1136
- ) # pylint: disable=E1111,E1101,E1136
- xs = xs.astype(numpy.int32) # pylint: disable=E1101
- ys = ys.astype(numpy.int32) # pylint: disable=E1101
+ ys,
+ numpy.zeros(xs.shape[0]) + image.shape[0] - 1,
+ )
+ xs = xs.astype(numpy.int32)
+ ys = ys.astype(numpy.int32)
image[ys, xs] = 1
- res = dict(
- size=size, angle=angle, x1=x1, y1=y1, x2=x2, y2=y2, nbpoints=xs.shape[0]
- ) # pylint: disable=E1136
+ res = dict(size=size, angle=angle, x1=x1, y1=y1, x2=x2, y2=y2, nbpoints=xs.shape[0])
return res
diff --git a/mlstatpy/ml/_neural_tree_api.py b/mlstatpy/ml/_neural_tree_api.py
index 70e525a9..5df399dd 100644
--- a/mlstatpy/ml/_neural_tree_api.py
+++ b/mlstatpy/ml/_neural_tree_api.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
import numpy
from ..optim import SGDOptimizer
@@ -12,7 +10,7 @@ class _TrainingAPI:
@property
def training_weights(self):
"Returns the weights."
- raise NotImplementedError("This should be overwritten.") # pragma: no cover
+ raise NotImplementedError("This should be overwritten.")
def update_training_weights(self, grad, add=True):
"""
@@ -21,25 +19,25 @@ def update_training_weights(self, grad, add=True):
:param grad: vector to add to the weights such as gradient
:param add: addition or replace
"""
- raise NotImplementedError("This should be overwritten.") # pragma: no cover
+ raise NotImplementedError("This should be overwritten.")
def fill_cache(self, X):
"""
Creates a cache with intermediate results.
"""
- return None # pragma: no cover
+ return None
def loss(self, X, y, cache=None):
"""
Computes the loss. Returns a float.
"""
- raise NotImplementedError("This should be overwritten.") # pragma: no cover
+ raise NotImplementedError("This should be overwritten.")
def dlossds(self, X, y, cache=None):
"""
Computes the loss derivative due to prediction error.
"""
- raise NotImplementedError("This should be overwritten.") # pragma: no cover
+ raise NotImplementedError("This should be overwritten.")
def gradient_backward(self, graddx, X, inputs=False, cache=None):
"""
@@ -52,7 +50,7 @@ def gradient_backward(self, graddx, X, inputs=False, cache=None):
:param cache: cache intermediate results to avoid more computation
:return: gradient
"""
- raise NotImplementedError("This should be overwritten.") # pragma: no cover
+ raise NotImplementedError("This should be overwritten.")
def gradient(self, X, y, inputs=False):
"""
@@ -65,10 +63,10 @@ def gradient(self, X, y, inputs=False):
:return: gradient
"""
if len(X.shape) != 1:
- raise ValueError( # pragma: no cover
+ raise ValueError(
f"X must a vector of one dimension but has shape {X.shape}."
)
- cache = self.fill_cache(X) # pylint: disable=E1128
+ cache = self.fill_cache(X)
dlossds = self.dlossds(X, y, cache=cache)
return self.gradient_backward(dlossds, X, inputs=inputs, cache=cache)
diff --git a/mlstatpy/ml/_neural_tree_node.py b/mlstatpy/ml/_neural_tree_node.py
index 2d482802..bd49c467 100644
--- a/mlstatpy/ml/_neural_tree_node.py
+++ b/mlstatpy/ml/_neural_tree_node.py
@@ -1,8 +1,6 @@
-# coding: utf-8
-
import numpy
import numpy.random as rnd
-from scipy.special import expit, softmax, kl_div as kl_fct # pylint: disable=E0611
+from scipy.special import expit, softmax, kl_div as kl_fct
from ._neural_tree_api import _TrainingAPI
@@ -82,9 +80,7 @@ def get_activation_function(activation):
return NeuralTreeNode._leakyrelu
if activation == "identity":
return lambda x: x
- raise ValueError( # pragma: no cover
- f"Unknown activation function '{activation}'."
- )
+ raise ValueError(f"Unknown activation function '{activation}'.")
@staticmethod
def get_activation_gradient_function(activation):
@@ -114,9 +110,7 @@ def get_activation_gradient_function(activation):
return NeuralTreeNode._dleakyrelu
if activation == "identity":
return lambda x: numpy.ones(x.shape, dtype=x.dtype)
- raise ValueError( # pragma: no cover
- f"Unknown activation gradient function '{activation}'."
- )
+ raise ValueError(f"Unknown activation gradient function '{activation}'.")
@staticmethod
def get_activation_loss_function(activation):
@@ -169,9 +163,7 @@ def dregdx(x, y):
return (x - y) * 2
return dregdx
- raise ValueError( # pragma: no cover
- f"Unknown activation function '{activation}'."
- )
+ raise ValueError(f"Unknown activation function '{activation}'.")
def __init__(self, weights, bias=None, activation="sigmoid", nodeid=-1, tag=None):
self.tag = tag
@@ -201,9 +193,7 @@ def __init__(self, weights, bias=None, activation="sigmoid", nodeid=-1, tag=None
self.coef[:, 1:] = weights
self.coef[:, 0] = bias
else:
- raise RuntimeError( # pragma: no cover
- f"Unexpected weights shape: {weights.shape}"
- )
+ raise RuntimeError(f"Unexpected weights shape: {weights.shape}")
self.activation = activation
self.nodeid = nodeid
@@ -253,9 +243,7 @@ def __setstate__(self, state):
def __eq__(self, obj):
if self.coef.shape != obj.coef.shape:
return False
- if any(
- map(lambda xy: xy[0] != xy[1], zip(self.coef.ravel(), obj.coef.ravel()))
- ):
+ if any(xy[0] != xy[1] for xy in zip(self.coef.ravel(), obj.coef.ravel())):
return False
if self.activation != obj.activation:
return False
@@ -310,7 +298,7 @@ def training_weights(self):
"Returns the weights stored in the neuron."
return self.coef.ravel()
- def update_training_weights(self, X, add=True): # pylint: disable=W0237
+ def update_training_weights(self, X, add=True):
"""
Updates weights.
@@ -349,8 +337,8 @@ def loss(self, X, y, cache=None):
"""
act = self._common_loss_dloss(X, y, cache=cache)
if len(X.shape) == 1:
- return self.losss_(act, y) # pylint: disable=E1120
- return self.losss_(act, y) # pylint: disable=E1120
+ return self.losss_(act, y)
+ return self.losss_(act, y)
def dlossds(self, X, y, cache=None):
"""
diff --git a/mlstatpy/ml/kppv.py b/mlstatpy/ml/kppv.py
index be38edf0..1ee1069c 100644
--- a/mlstatpy/ml/kppv.py
+++ b/mlstatpy/ml/kppv.py
@@ -1,5 +1,3 @@
-# coding: utf-8
-
import numpy
import numpy.linalg
from scipy.spatial.distance import euclidean
@@ -16,7 +14,6 @@ def __init__(self):
"""
constructeur
"""
- pass
def fit(self, X, y=None):
"""
@@ -38,13 +35,9 @@ def kneighbors(self, X, n_neighbors=1, return_distance=True):
@return array (dist), array (indices)
"""
if n_neighbors != 1:
- raise NotImplementedError( # pragma: no cover
- "Not implemented when n_neighbors != 1."
- )
+ raise NotImplementedError("Not implemented when n_neighbors != 1.")
if not return_distance:
- raise NotImplementedError( # pragma: no cover
- "Not implemented when return_distance is False."
- )
+ raise NotImplementedError("Not implemented when return_distance is False.")
dist = numpy.zeros(X.shape[0])
ind = numpy.zeros(X.shape[0], dtype=numpy.int64)
diff --git a/mlstatpy/ml/kppv_laesa.py b/mlstatpy/ml/kppv_laesa.py
index c4b5e588..5d4277ab 100644
--- a/mlstatpy/ml/kppv_laesa.py
+++ b/mlstatpy/ml/kppv_laesa.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
import random
import numpy
from .kppv import NuagePoints
@@ -51,7 +49,7 @@ def selection_pivots(self, nb):
# on calcule aussi la distance de chaque éléments au pivots
self.dist = numpy.zeros((self.nuage.shape[0], len(self.pivots)))
for i in range(self.nuage.shape[0]):
- for j in range(len(self.pivots)): # pylint: disable=C0200
+ for j in range(len(self.pivots)):
self.dist[i, j] = self.distance(
self.nuage[i, :], self.nuage[self.pivots[j], :]
)
@@ -75,10 +73,10 @@ def ppv(self, obj):
dm, im, _ = min(dp)
# améliorations
- for i in range(0, self.nuage.shape[0]):
+ for i in range(self.nuage.shape[0]):
# on regarde si un pivot permet d'éliminer l'élément i
calcul = True
- for d, p, ip in dp:
+ for d, _p, ip in dp:
delta = abs(d - self.dist[i, ip])
if delta > dm:
calcul = False
diff --git a/mlstatpy/ml/logreg.py b/mlstatpy/ml/logreg.py
index c2534724..bd80ff51 100644
--- a/mlstatpy/ml/logreg.py
+++ b/mlstatpy/ml/logreg.py
@@ -39,9 +39,9 @@ def plot_ds(X, y, ax=None, title=None):
features, *y* contains the binary labels.
"""
if ax is None:
- import matplotlib.pyplot as plt # pragma: no cover
+ import matplotlib.pyplot as plt
- ax = plt.gca() # pragma: no cover
+ ax = plt.gca()
colors = {0: "#88CCCC", 1: "#CCCC88"}
c = [colors[_] for _ in y]
ax.scatter(X[:, 0], X[:, 1], c=c, s=20, edgecolor="k", lw=0.1)
diff --git a/mlstatpy/ml/matrices.py b/mlstatpy/ml/matrices.py
index e576aed2..1710131a 100644
--- a/mlstatpy/ml/matrices.py
+++ b/mlstatpy/ml/matrices.py
@@ -1,12 +1,12 @@
import warnings
import numpy
import numpy.linalg
-from scipy.linalg.lapack import dtrtri # pylint: disable=E0611
+from scipy.linalg.lapack import dtrtri
def gram_schmidt(mat, change=False):
"""
- Applies the `Gram–Schmidt process
+ Applies the `Gram-Schmidt process
`_.
Due to performance, every row is considered as a vector.
@@ -35,9 +35,9 @@ def gram_schmidt(mat, change=False):
print(P)
"""
if len(mat.shape) != 2:
- raise ValueError("mat must be a matrix.") # pragma: no cover
+ raise ValueError("mat must be a matrix.")
if mat.shape[1] < mat.shape[0]:
- raise RuntimeError( # pragma: no cover
+ raise RuntimeError(
"The function only works if the number of rows is less "
"than the number of columns."
)
@@ -45,9 +45,9 @@ def gram_schmidt(mat, change=False):
base = numpy.identity(mat.shape[0])
# The following code is equivalent to:
# res = numpy.empty(mat.shape)
- # for i in range(0, mat.shape[0]):
+ # for i in range(mat.shape[0]):
# res[i, :] = mat[i, :]
- # for j in range(0, i):
+ # for j in range(i):
# d = numpy.dot(res[j, :], mat[i, :])
# res[i, :] -= res[j, :] * d
# if change:
@@ -60,7 +60,7 @@ def gram_schmidt(mat, change=False):
# base[i, :] /= d
# But it is faster to write it this way:
res = numpy.empty(mat.shape)
- for i in range(0, mat.shape[0]):
+ for i in range(mat.shape[0]):
res[i, :] = mat[i, :]
if i > 0:
d = numpy.dot(res[:i, :], mat[i, :])
@@ -118,8 +118,9 @@ def linear_regression(X, y, algo=None):
to the algorithm).
"""
if len(y.shape) != 1:
- warnings.warn( # pragma: no cover
- "This function is not tested for a multidimensional linear regression."
+ warnings.warn(
+ "This function is not tested for a multidimensional linear regression.",
+ stacklevel=0,
)
if algo is None:
inv = numpy.linalg.inv(X.T @ X)
@@ -129,11 +130,11 @@ def linear_regression(X, y, algo=None):
# T = P X
return (y.T @ T.T @ P).ravel()
if algo == "qr":
- Q, R = numpy.linalg.qr(X, "full")
+ Q, R = numpy.linalg.qr(X, "reduced")
Ri = dtrtri(R)[0]
gamma = (y.T @ Q).ravel()
return (gamma @ Ri.T).ravel()
- raise ValueError(f"Unknwown algo='{algo}'.") # pragma: no cover
+ raise ValueError(f"Unknwown algo='{algo}'.")
def norm2(X):
@@ -162,11 +163,11 @@ def streaming_gram_schmidt_update(Xk, Pk):
tki = Pk @ Xk
idi = numpy.identity(Pk.shape[0])
- for i in range(0, Pk.shape[0]):
+ for i in range(Pk.shape[0]):
val = tki[i]
if i > 0:
- # for j in range(0, i):
+ # for j in range(i):
# d = tki[j] * val
# tki[i] -= tki[j] * d
# Pk[i, :] -= Pk[j, :] * d
@@ -178,7 +179,7 @@ def streaming_gram_schmidt_update(Xk, Pk):
Pk[i, :] -= numpy.multiply(Pk[:i, :], dv).sum(axis=0)
idi[i, :] -= numpy.multiply(idi[:i, :], dv).sum(axis=0)
- d = numpy.square(idi[i, :]).sum() # pylint: disable=E1101
+ d = numpy.square(idi[i, :]).sum()
d = tki[i] ** 2 + d
if d > 0:
d **= 0.5
@@ -219,7 +220,7 @@ def streaming_gram_schmidt(mat, start=None):
print(t.T @ t)
"""
if len(mat.shape) != 2:
- raise ValueError("mat must be a matrix.") # pragma: no cover
+ raise ValueError("mat must be a matrix.")
if mat.shape[1] < mat.shape[0]:
raise RuntimeError(
"The function only works if the number of rows is less "
@@ -281,15 +282,16 @@ def streaming_linear_regression(mat, y, start=None):
print("iteration", i, bk, bk0)
"""
if len(mat.shape) != 2:
- raise ValueError("mat must be a matrix.") # pragma: no cover
+ raise ValueError("mat must be a matrix.")
if mat.shape[0] < mat.shape[1]:
raise RuntimeError(
"The function only works if the number of rows is more "
"than the number of columns."
)
if len(y.shape) != 1:
- warnings.warn( # pragma: no cover
- "This function is not tested for a multidimensional linear regression."
+ warnings.warn(
+ "This function is not tested for a multidimensional linear regression.",
+ stacklevel=0,
)
if start is None:
start = mat.shape[1]
@@ -352,15 +354,16 @@ def streaming_linear_regression_gram_schmidt(mat, y, start=None):
print("iteration", i, bk, bk0)
"""
if len(mat.shape) != 2:
- raise ValueError("mat must be a matrix.") # pragma: no cover
+ raise ValueError("mat must be a matrix.")
if mat.shape[0] < mat.shape[1]:
raise RuntimeError(
"The function only works if the number of rows is more "
"than the number of columns."
)
if len(y.shape) != 1:
- warnings.warn( # pragma: no cover
- "This function is not tested for a multidimensional linear regression."
+ warnings.warn(
+ "This function is not tested for a multidimensional linear regression.",
+ stacklevel=0,
)
if start is None:
start = mat.shape[1]
diff --git a/mlstatpy/ml/neural_tree.py b/mlstatpy/ml/neural_tree.py
index 678e6f1e..8bd9a34d 100644
--- a/mlstatpy/ml/neural_tree.py
+++ b/mlstatpy/ml/neural_tree.py
@@ -1,5 +1,3 @@
-# coding: utf-8
-
from io import BytesIO
import pickle
import numpy
@@ -273,7 +271,7 @@ def _create_from_tree_one(tree, k=1.0):
root = NeuralTreeNet(tree.max_features_, empty=True)
feat_index = numpy.arange(0, max_features_)
predecessor = {}
- outputs = {i: [] for i in range(0, tree.n_classes_)}
+ outputs = {i: [] for i in range(tree.n_classes_)}
for i in range(n_nodes):
if children_left[i] != children_right[i]:
# node with a threshold
@@ -327,12 +325,12 @@ def _create_from_tree_one(tree, k=1.0):
output = []
index = [0]
nb = []
- for i in range(0, tree.n_classes_):
+ for i in range(tree.n_classes_):
output.extend(outputs[i])
nb.append(len(outputs[i]))
index.append(len(outputs[i]) + index[-1])
coef = numpy.zeros((len(nb), len(output)), dtype=numpy.float64)
- for i in range(0, tree.n_classes_):
+ for i in range(tree.n_classes_):
coef[i, index[i] : index[i + 1]] = k
feat = [root[n.nodeid][1]["output"] for n in output]
root.append(
@@ -528,7 +526,7 @@ def to_dot(self, X=None):
labels = {}
- for i in range(0, len(self)): # pylint: disable=C0200
+ for i in range(len(self)):
o = self[i][1]["output"]
if isinstance(o, int):
lo = str(o)
@@ -598,7 +596,7 @@ def training_weights(self):
pos += s
return res
- def update_training_weights(self, X, add=True): # pylint: disable=W0237
+ def update_training_weights(self, X, add=True):
"""
Updates weights.
@@ -669,8 +667,8 @@ def loss(self, X, y, cache=None):
"""
res, _, last_node, last_attr = self._common_loss_dloss(X, y, cache=cache)
if len(res.shape) <= 1:
- return last_node.loss(res[last_attr["inputs"]], y) # pylint: disable=E1120
- return last_node.loss(res[:, last_attr["inputs"]], y) # pylint: disable=E1120
+ return last_node.loss(res[last_attr["inputs"]], y)
+ return last_node.loss(res[:, last_attr["inputs"]], y)
def dlossds(self, X, y, cache=None):
"""
@@ -678,12 +676,8 @@ def dlossds(self, X, y, cache=None):
"""
res, _, last_node, last_attr = self._common_loss_dloss(X, y, cache=cache)
if len(res.shape) <= 1:
- return last_node.dlossds(
- res[last_attr["inputs"]], y
- ) # pylint: disable=E1120
- return last_node.dlossds(
- res[:, last_attr["inputs"]], y
- ) # pylint: disable=E1120
+ return last_node.dlossds(res[last_attr["inputs"]], y)
+ return last_node.dlossds(res[:, last_attr["inputs"]], y)
def gradient_backward(self, graddx, X, inputs=False, cache=None):
"""
@@ -847,7 +841,7 @@ def onnx_converter():
Converts this model into ONNX.
"""
from skl2onnx.common.data_types import guess_numpy_type
- from skl2onnx.algebra.onnx_ops import ( # pylint: disable=E0611
+ from skl2onnx.algebra.onnx_ops import (
OnnxIdentity,
OnnxArgMax,
OnnxAdd,
diff --git a/mlstatpy/ml/roc.py b/mlstatpy/ml/roc.py
index 015c6f17..547ad067 100644
--- a/mlstatpy/ml/roc.py
+++ b/mlstatpy/ml/roc.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
import math
import itertools
from enum import Enum
@@ -64,9 +62,7 @@ def __init__(self, y_true=None, y_score=None, sample_weight=None, df=None):
else:
self.data = pandas.DataFrame(df, columns=["score", "label", "weight"])
elif not isinstance(df, pandas.DataFrame):
- raise TypeError( # pragma: no cover
- f"df should be a DataFrame, not {type(df)}"
- )
+ raise TypeError(f"df should be a DataFrame, not {type(df)}")
else:
self.data = df.copy()
self.data.sort_values(self.data.columns[0], inplace=True)
@@ -140,9 +136,9 @@ def confusion(self, score=None, nb=10, curve=CurveType.ROC, bootstrap=False):
cloud["cw"] = cloud[cloud.columns[2]].cumsum()
cloud["clw"] = cloud["lw"].cumsum()
if cloud.columns[4] != "cw":
- raise ValueError("Column 4 should be 'cw'.") # pragma: no cover
+ raise ValueError("Column 4 should be 'cw'.")
if cloud.columns[5] != "clw":
- raise ValueError("Column 5 should be 'clw'.") # pragma: no cover
+ raise ValueError("Column 5 should be 'clw'.")
pos_roc = 0
pos_seuil = 0
@@ -183,7 +179,7 @@ def confusion(self, score=None, nb=10, curve=CurveType.ROC, bootstrap=False):
roc.iloc[pos_roc:, 3] = 0
roc.iloc[pos_roc:, 4] = min(cloud.iloc[:, 0])
return roc
- raise NotImplementedError( # pragma: no cover
+ raise NotImplementedError(
f"Unexpected type '{curve}', only ROC is allowed."
)
@@ -191,9 +187,7 @@ def confusion(self, score=None, nb=10, curve=CurveType.ROC, bootstrap=False):
roc = self.confusion(nb=len(self), curve=curve, bootstrap=False, score=None)
roc = roc[roc["threshold"] <= score]
if len(roc) == 0:
- raise ValueError( # pragma: no cover
- f"The requested confusion is empty for score={score}."
- )
+ raise ValueError(f"The requested confusion is empty for score={score}.")
return roc[:1]
def precision(self):
@@ -241,9 +235,7 @@ def compute_roc_curve(self, nb=100, curve=CurveType.ROC, bootstrap=False):
if curve is ROC.CurveType.SKROC:
if nb > 0:
- raise NotImplementedError( # pragma: no cover
- "nb must be <= 0 si curve is SKROC"
- )
+ raise NotImplementedError("nb must be <= 0 si curve is SKROC")
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(
@@ -275,9 +267,9 @@ def compute_roc_curve(self, nb=100, curve=CurveType.ROC, bootstrap=False):
cloud["clw"] = cloud["lw"].cumsum()
sum_weights_ans = cloud["lw"].sum()
if cloud.columns[4] != "cw":
- raise ValueError("Column 4 should be 'cw'.") # pragma: no cover
+ raise ValueError("Column 4 should be 'cw'.")
if cloud.columns[5] != "clw":
- raise ValueError("Column 5 should be 'clw'.") # pragma: no cover
+ raise ValueError("Column 5 should be 'clw'.")
pos_roc = 0
pos_seuil = 0
@@ -325,9 +317,7 @@ def compute_roc_curve(self, nb=100, curve=CurveType.ROC, bootstrap=False):
roc.iloc[pos_roc:, 2] = cloud.iloc[-1, 0]
else:
- raise NotImplementedError( # pragma: no cover
- f"Unknown curve type '{curve}'."
- )
+ raise NotImplementedError(f"Unknown curve type '{curve}'.")
return roc
@@ -372,10 +362,10 @@ def plot(
ckwargs["legend"] = False
if "label" in ckwargs:
del ckwargs["label"]
- for _ in range(0, bootstrap):
+ for _ in range(bootstrap):
roc = self.compute_roc_curve(nb, curve=curve, bootstrap=True)
if thresholds:
- cols = list(_ for _ in roc.columns if _ != "threshold")
+ cols = [_ for _ in roc.columns if _ != "threshold"]
roc = roc.sort_values("threshold").reset_index(drop=True)
ax = roc.plot(
x="threshold",
@@ -385,7 +375,7 @@ def plot(
**ckwargs,
)
else:
- cols = list(_ for _ in roc.columns[1:] if _ != "threshold")
+ cols = [_ for _ in roc.columns[1:] if _ != "threshold"]
roc = roc.sort_values(roc.columns[0]).reset_index(drop=True)
ax = roc.plot(
x=roc.columns[0],
@@ -404,13 +394,11 @@ def plot(
if not thresholds:
roc = roc[[_ for _ in roc.columns if _ != "threshold"]]
- cols = list(_ for _ in roc.columns if _ != "threshold")
+ cols = [_ for _ in roc.columns if _ != "threshold"]
final = 0
if thresholds:
if "label" in kwargs and len(cols) != len(kwargs["label"]):
- raise ValueError( # pragma: no cover
- f"label must have {len(cols)} values"
- )
+ raise ValueError(f"label must have {len(cols)} values")
roc = roc.sort_values("threshold").reset_index(drop=True)
ax = roc.plot(x="threshold", y=cols, ax=ax, **kwargs)
ax.set_ylim([0, 1])
@@ -466,7 +454,7 @@ def auc(self, cloud=None):
elif a[0] >= b[0]:
auc += a[2] * b[2] / 2
if auc == 0 and good.shape[0] + wrong.shape[0] < self.data.shape[0]:
- raise ValueError( # pragma: no cover
+ raise ValueError(
"Label are not right, expect 0 and 1 not {0}".format(
set(cloud[cloud.columns[1]])
)
@@ -485,9 +473,9 @@ def auc_interval(self, bootstrap=10, alpha=0.95):
@return dictionary of values
"""
if bootstrap <= 1:
- raise ValueError("Use auc instead, bootstrap < 2") # pragma: no cover
+ raise ValueError("Use auc instead, bootstrap < 2")
rate = []
- for _ in range(0, bootstrap):
+ for _ in range(bootstrap):
cloud = self.random_cloud()
auc = self.auc(cloud)
rate.append(auc)
@@ -555,7 +543,7 @@ def roc_intersect_interval(
"""
rate = []
- for _ in range(0, bootstrap):
+ for _ in range(bootstrap):
roc = self.compute_roc_curve(nb, curve=curve, bootstrap=True)
r = self.roc_intersect(roc, x)
rate.append(r)
diff --git a/mlstatpy/ml/voronoi.py b/mlstatpy/ml/voronoi.py
index d2b5f481..2163c16a 100644
--- a/mlstatpy/ml/voronoi.py
+++ b/mlstatpy/ml/voronoi.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
import warnings
import numpy
from sklearn.linear_model import LinearRegression
@@ -12,8 +10,6 @@ class VoronoiEstimationError(Exception):
Raised when the algorithm failed.
"""
- pass
-
def voronoi_estimation_from_lr(
L, B, C=None, D=None, cl=0, qr=True, max_iter=None, verbose=False
@@ -73,7 +69,7 @@ def voronoi_estimation_from_lr(
nb_constraints = numpy.zeros((L.shape[0],))
matL = []
matB = []
- for i in range(0, L.shape[0]):
+ for i in range(L.shape[0]):
for j in range(i + 1, L.shape[0]):
li = L[i, :]
lj = L[j, :]
@@ -104,7 +100,7 @@ def voronoi_estimation_from_lr(
continue
found = True
if not found:
- raise ValueError( # pragma: no cover
+ raise ValueError(
"Matrix L has two similar rows {0} and {1}. "
"Problem cannot be solved.".format(i, j)
)
@@ -132,8 +128,9 @@ def voronoi_estimation_from_lr(
if nbeq * 2 <= L.shape[0] * L.shape[1]:
if C is None and D is None:
- warnings.warn( # pragma: no cover
- "[voronoi_estimation_from_lr] Additional condition are required."
+ warnings.warn(
+ "[voronoi_estimation_from_lr] Additional condition are required.",
+ stacklevel=0,
)
if C is not None and D is not None:
matL = numpy.vstack([matL, numpy.zeros((1, matL.shape[1]))])
@@ -144,7 +141,7 @@ def voronoi_estimation_from_lr(
raise TypeError(f"D must be a float not {type(D)}")
matB = numpy.hstack([matB, [D]])
elif C is None and D is None:
- pass # pragma: no cover
+ pass
else:
raise ValueError("C and D must be None together or not None together.")
@@ -152,7 +149,7 @@ def voronoi_estimation_from_lr(
tol = numpy.abs(matL.ravel()).max() * 1e-8 / matL.shape[0]
order_removed = []
removed = set()
- for it in range(0, max(max_iter, 1)):
+ for it in range(max(max_iter, 1)):
if qr:
clr = QuantileLinearRegression(
fit_intercept=False, max_iter=max(matL.shape)
@@ -210,7 +207,7 @@ def voronoi_estimation_from_lr(
break
pos -= 1
if pos < 0:
- raise VoronoiEstimationError( # pragma: no cover
+ raise VoronoiEstimationError(
"Two classes have been merged in a single Voronoi point "
"(dist={0} < {1}). max_iter should be lower than "
"{2}".format(dist[-1][0], tol, it)
diff --git a/mlstatpy/nlp/completion.py b/mlstatpy/nlp/completion.py
index 6c2b402f..83e7f558 100644
--- a/mlstatpy/nlp/completion.py
+++ b/mlstatpy/nlp/completion.py
@@ -116,8 +116,8 @@ def iter_leaves(self, max_weight=None) -> Iterator[Tuple[float, str]]:
def iter_local(node):
if node.leave and (max_weight is None or node.weight <= max_weight):
yield node.weight, None, node.value
- for w, k, v in sorted(node.items()):
- for w_, k_, v_ in iter_local(v):
+ for _w, _k, v in sorted(node.items()):
+ for w_, k_, v_ in iter_local(v): # noqa: UP028
yield w_, k_, v_
for w, _, v in sorted(iter_local(self)):
@@ -150,7 +150,7 @@ def all_completions(self) -> List[Tuple["CompletionTrieNode", List[str]]]:
nodes.reverse()
all_res = []
for node in nodes:
- res = list(n[1] for n in node.iter_leaves())
+ res = [n[1] for n in node.iter_leaves()]
all_res.append((node, res))
all_res.reverse()
return all_res
@@ -256,7 +256,7 @@ def build(words) -> "CompletionTrieNode":
new_node.weight = w
if disp is not None:
new_node.disp = disp
- nb += 1
+ nb += 1 # noqa: SIM113
root.weight = minw
return root
@@ -314,7 +314,7 @@ def min_keystroke(self, word: str) -> Tuple[int, int]:
metric = len(word)
best = len(word)
for node in nodes[1:]:
- res = list(n[1] for n in node.iter_leaves())
+ res = [n[1] for n in node.iter_leaves()]
ind = res.index(word)
m = len(node.value) + ind + 1
if m < metric:
diff --git a/mlstatpy/nlp/completion_simple.py b/mlstatpy/nlp/completion_simple.py
index 42731e9d..5477de60 100644
--- a/mlstatpy/nlp/completion_simple.py
+++ b/mlstatpy/nlp/completion_simple.py
@@ -1,4 +1,4 @@
-from typing import Tuple, List, Iterator, Dict
+from typing import Tuple, List, Iterator, Dict, Optional
from .completion import CompletionTrieNode
@@ -125,7 +125,7 @@ def str_all_completions(self, maxn=10, use_precompute=True) -> str:
completions = self._info._completions.get(prefix, [])
for i2, el in enumerate(completions):
ar = " " if el.value != self.value else "-> "
- add = "{5}{0}:{1} -- {2}{4}-- {3}".format(
+ add = "{5}{0}:{1} -- {2}{4}-- {3}".format( # noqa: UP030
i2,
el.weight,
el.value,
@@ -139,7 +139,7 @@ def str_all_completions(self, maxn=10, use_precompute=True) -> str:
return "\n".join(rows)
def init_metrics(
- self, position: int, completions: List["CompletionElement"] = None
+ self, position: int, completions: Optional[List["CompletionElement"]] = None
):
"""
Initializes the metrics.
@@ -198,7 +198,7 @@ def update_metrics(
position: int,
improved: dict,
delta: float,
- completions: List["CompletionElement"] = None,
+ completions: Optional[List["CompletionElement"]] = None,
iteration=-1,
):
"""
@@ -379,9 +379,7 @@ def find(self, value: str, is_sorted=False) -> CompletionElement:
:return: element or None
"""
if is_sorted:
- raise NotImplementedError( # pragma: no cover
- "No optimisation for the sorted case."
- )
+ raise NotImplementedError("No optimisation for the sorted case.")
for e in self:
if e.value == value:
return e
@@ -411,24 +409,19 @@ def __iter__(self) -> Iterator[CompletionElement]:
"""
Iterates over elements.
"""
- for e in self._elements:
- yield e
+ yield from self._elements
def sort_values(self):
"""
sort the elements by value
"""
- self._elements = list(
- _[-1] for _ in sorted((e.value, e.weight, e) for e in self)
- )
+ self._elements = [_[-1] for _ in sorted((e.value, e.weight, e) for e in self)]
def sort_weight(self):
"""
Sorts the elements by value.
"""
- self._elements = list(
- _[-1] for _ in sorted((e.weight, e.value, e) for e in self)
- )
+ self._elements = [_[-1] for _ in sorted((e.weight, e.value, e) for e in self)]
def compare_with_trie(self, delta=0.8):
"""
@@ -439,7 +432,7 @@ def compare_with_trie(self, delta=0.8):
"""
def format_diff(el, f, diff):
- s = (
+ s = ( # noqa: UP030
"VALUE={0}\nSYST=[{1}]\nTRIE=[{2}]\nMORE SYSTEM:"
"\n{3}\n######\nMORE TRIE:\n{4}"
).format(
@@ -491,9 +484,7 @@ def compute_metrics(self, ffilter=None, delta=0.8, details=False) -> int:
"""
self.sort_weight()
if ffilter is not None:
- raise NotImplementedError( # pragma: no cover
- "ffilter not None is not implemented"
- )
+ raise NotImplementedError("ffilter not None is not implemented")
if details:
store_completions = {"": []}
@@ -516,8 +507,8 @@ def compute_metrics(self, ffilter=None, delta=0.8, details=False) -> int:
while updates > 0:
displayed = {}
updates = 0
- for i, el in enumerate(self._elements):
- for k in range(0, len(el.value)):
+ for _i, el in enumerate(self._elements):
+ for k in range(len(el.value)):
prefix = el.value[:k]
if prefix not in displayed:
displayed[prefix] = 0
@@ -621,7 +612,7 @@ def test_metric(self, qset: Iterator[Tuple[str, float]]) -> Dict[str, float]:
It then calls @see me enumerate_metric.
"""
res = dict(mks0=0.0, mks1=0.0, mks2=0.0, sum_weights=0.0, sum_wlen=0.0, n=0)
- hist = {k: {} for k in {"mks0", "mks1", "mks2", "l"}} # pylint: disable=C0208
+ hist = {k: {} for k in {"mks0", "mks1", "mks2", "l"}}
wei = {k: {} for k in hist}
res["hist"] = hist
res["histnow"] = wei
diff --git a/mlstatpy/nlp/normalize.py b/mlstatpy/nlp/normalize.py
index 98b3bae3..1ecd6cf7 100644
--- a/mlstatpy/nlp/normalize.py
+++ b/mlstatpy/nlp/normalize.py
@@ -1,20 +1,16 @@
-# -*- coding: utf-8 -*-
-
import unicodedata
def remove_diacritics(input_str):
"""
- remove diacritics
+ Removes diacritics.
- @param input_str string to clean
- @return cleaned string
+ :param input_str: string to clean
+ :return: cleaned string
Example::
enguérand --> enguerand
-
- .. versionadded:: 1.0
"""
nkfd_form = unicodedata.normalize("NFKD", input_str)
only_ascii = nkfd_form.encode("ASCII", "ignore")
diff --git a/mlstatpy/optim/sgd.py b/mlstatpy/optim/sgd.py
index 190eeb8b..0d2ed4e9 100644
--- a/mlstatpy/optim/sgd.py
+++ b/mlstatpy/optim/sgd.py
@@ -1,5 +1,9 @@
import numpy
-from numpy.core._exceptions import UFuncTypeError
+
+try:
+ from numpy.core._exceptions import UFuncTypeError
+except ImportError:
+ UFuncTypeError = Exception
class BaseOptimizer:
@@ -68,16 +72,16 @@ def update_coef(self, grad):
if self.min_threshold is not None:
try:
self.coef = numpy.maximum(self.coef, self.min_threshold)
- except UFuncTypeError: # pragma: no cover
- raise RuntimeError( # pylint: disable=W0707
+ except UFuncTypeError:
+ raise RuntimeError( # noqa: B904
"Unable to compute an upper bound with coef={} "
"max_threshold={}".format(self.coef, self.min_threshold)
)
if self.max_threshold is not None:
try:
self.coef = numpy.minimum(self.coef, self.max_threshold)
- except UFuncTypeError: # pragma: no cover
- raise RuntimeError( # pylint: disable=W0707
+ except UFuncTypeError:
+ raise RuntimeError( # noqa: B904
"Unable to compute a lower bound with coef={} "
"max_threshold={}".format(self.coef, self.max_threshold)
)
@@ -87,7 +91,6 @@ def iteration_ends(self, time_step):
Performs update to learning rate and potentially other states at the
end of an iteration.
"""
- pass # pragma: no cover
def train(
self, X, y, fct_loss, fct_grad, max_iter=100, early_th=None, verbose=False
@@ -135,9 +138,7 @@ def train(
if isinstance(verbose, int) and verbose >= 10:
self._display_progress(0, max_iter, loss, grad, "grad")
if numpy.isnan(grad).sum() > 0:
- raise RuntimeError( # pragma: no cover
- "The gradient has nan values."
- )
+ raise RuntimeError("The gradient has nan values.")
self.update_coef(grad)
n_samples += 1
@@ -180,18 +181,18 @@ def _evaluate_early_stopping(self, it, max_iter, losses, early_th, verbose=False
return False
if numpy.isnan(losses[-5]):
if numpy.isnan(losses[-1]):
- if verbose: # pragma: no cover
+ if verbose:
self._display_progress(
it + 1, max_iter, losses[-1], losses=losses[-5:]
)
return True
return False
if numpy.isnan(losses[-1]):
- if verbose: # pragma: no cover
+ if verbose:
self._display_progress(it + 1, max_iter, losses[-1], losses=losses[-5:])
return True
if abs(losses[-1] - losses[-5]) <= early_th:
- if verbose: # pragma: no cover
+ if verbose:
self._display_progress(it + 1, max_iter, losses[-1], losses=losses[-5:])
return True
return False
@@ -327,9 +328,7 @@ def iteration_ends(self, time_step):
elif self.lr_schedule == "constant":
pass
else:
- raise ValueError( # pragma: no cover
- f"Unexpected value: lr_schedule='{self.lr_schedule}'."
- )
+ raise ValueError(f"Unexpected value: lr_schedule='{self.lr_schedule}'.")
def _get_updates(self, grad):
"""
diff --git a/mlstatpy/render_js_dot.py b/mlstatpy/render_js_dot.py
new file mode 100644
index 00000000..836674a7
--- /dev/null
+++ b/mlstatpy/render_js_dot.py
@@ -0,0 +1,403 @@
+import uuid
+import os
+import shutil
+import urllib.request as liburl
+import urllib.error as liberror
+import IPython.core.display as ipydisplay
+from IPython.display import display_html, display_javascript
+
+
+class UrlNotFoundError(Exception):
+ """
+ Raised when a url does not exist.
+ """
+
+ def __init__(self, url, code):
+ Exception.__init__(self, f"Url not found: returned code={code} for '{url}'")
+
+
+class JavascriptScriptError(ValueError):
+ """
+ Raised when the class does not find what it expects.
+ """
+
+
+def check_url(url):
+ "Checks urls."
+ try:
+ liburl.urlopen(url) # pylint: disable=R1732
+ return True
+ except liberror.HTTPError as e:
+ raise UrlNotFoundError(url, e.code) from e
+ except liberror.URLError as e:
+ raise UrlNotFoundError(url, e.reason) from e
+ except Exception as e:
+ raise AssertionError(f"Issue with url '{url}'") from e
+
+
+class RenderJSRaw:
+ """
+ Adds :epkg:`javascript` into a noteboook.
+
+ :param script: (str) script
+ :param width: (str) width
+ :param height: (str) height
+ :param style: (str) style (added in ````)
+ :param divid: (str|None) id of the div
+ :param css: (list) list of css
+ :param libs: (list) list of dependencies
+ :param only_html: (bool) use only function
+ `display_html `_
+ and not `display_javascript
+ `_ to add
+ javascript to the page.
+ :param div_class: (str) class of the section ``div`` which will host the results
+ of the javascript
+ :param check_urls: (bool) by default, check url exists
+ :param local: (bool|False) use local javascript files
+ """
+
+ def __init__(
+ self,
+ script,
+ width="100%",
+ height="100%",
+ divid=None,
+ css=None,
+ libs=None,
+ style=None,
+ only_html=False,
+ div_class=None,
+ check_urls=True,
+ local=False,
+ ):
+ self.script = script
+ self.uuid = divid if divid else "M" + str(uuid.uuid4()).replace("-", "")
+ if style is None:
+ style = ""
+ if width is not None and "width" not in style:
+ style += f"width:{width};"
+ if height is not None and "height" not in style:
+ style += f"height:{height};"
+ if not style:
+ style = None
+ else:
+ if width is not None and "width" not in style:
+ style += f"width:{width};"
+ if height is not None and "height" not in style:
+ style += f"height:{height};"
+ self.style = style
+ self.only_html = only_html
+ self.div_class = div_class
+ if "__ID__" not in script:
+ raise JavascriptScriptError(
+ f"The sript does not contain any string __ID__. "
+ f"It is replaced by the ID value in script:\n{script}"
+ )
+ self.local = local
+ self.css, self.libs = self._copy_local(css, libs, local)
+ if check_urls and not local:
+ if self.css is not None:
+ for c in self.css:
+ check_url(c)
+ if self.libs is not None:
+ for lib in self.libs:
+ if isinstance(lib, dict):
+ check_url(lib["path"])
+ else:
+ check_url(lib)
+
+ def _copy_local(self, css, libs, local):
+ """
+ If *self.local*, copies javascript dependencies in the local folder.
+
+ :param css: list of css
+ :param libs: list of libraries
+ :param local: boolean or new location
+ :return: tuple (css, libs)
+ """
+ if not self.local:
+ return css, libs
+ to_copy = []
+ if css:
+ to_copy.extend(css)
+ if libs:
+ for js in libs:
+ if isinstance(js, dict):
+ to_copy.append(js["path"])
+ else:
+ to_copy.append(js)
+
+ for js in to_copy:
+ if not os.path.exists(js):
+ raise FileNotFoundError(f"Unable to find '{js}'")
+ dest = local if isinstance(local, str) else os.getcwd()
+ shutil.copy(js, dest)
+
+ if css:
+ css = [os.path.split(c)[-1] for c in css]
+ if libs:
+
+ def proc(d):
+ "proc"
+ if isinstance(d, dict):
+ d = d.copy()
+ d["path"] = os.path.split(d["path"])[-1]
+ return d
+ else:
+ return os.path.split(d)[-1]
+
+ libs = [proc(c) for c in libs]
+ return css, libs
+
+ def generate_html(self):
+ """
+ Overloads method
+ `_ipython_display_ `_.
+
+ :return: `HTML `_ text,
+ `Javascript `_ text
+ """
+ if self.style:
+ style = f' style="{self.style}"'
+ else:
+ style = ""
+ if self.div_class:
+ divcl = f' class="{self.div_class}"'
+ else:
+ divcl = ""
+ if self.css:
+ css = "".join(
+ f''
+ for c in self.css
+ )
+ ht = (
+ ''
+ ).format(uuid=self.uuid, css=css, style=style, divcl=divcl)
+ else:
+ ht = (
+ ''
+ ).format(uuid=self.uuid, style=style, divcl=divcl)
+
+ script = self.script.replace("__ID__", self.uuid)
+ if self.libs:
+ names = []
+ paths = []
+ shims = {}
+ args = []
+ exports = []
+ for lib in self.libs:
+ if isinstance(lib, dict):
+ name = lib.get("name", None)
+ if "path" in lib:
+ p = lib["path"]
+ if name is None:
+ name = ".".join((p.split("/")[-1]).split(".")[:-1])
+ path = ".".join(p.split(".")[:-1])
+ paths.append((name, path))
+ else:
+ raise KeyError(f"unable to find 'path' in {lib}")
+ names.append(name)
+ args.append(name)
+ if "exports" in lib:
+ if name not in shims:
+ shims[name] = {}
+ shims[name]["exports"] = lib["exports"]
+ if isinstance(lib["exports"], list):
+ exports.extend(lib["exports"])
+ else:
+ exports.append(lib["exports"])
+ if "deps" in lib:
+ if name not in shims:
+ shims[name] = {}
+ shims[name]["deps"] = lib["deps"]
+ else:
+ names.append(lib)
+ if len(names) == 0:
+ raise ValueError(
+ (
+ "names is empty.\nlibs={0}\npaths={1}"
+ "\nshims={2}\nexports={3}"
+ ).format(self.libs, paths, shims, exports)
+ )
+ require = ",".join(f"'{na}'" for na in names)
+
+ config = ["require.config({"]
+ if len(paths) > 0:
+ config.append("paths:{")
+ for name, path in paths:
+ config.append(f"'{name}':'{path}',")
+ config.append("},")
+ if len(shims) > 0:
+ config.append("shim:{")
+
+ def vd(d):
+ "vd"
+ rows = []
+ for k, v in sorted(d.items()):
+ rows.append(
+ "'{0}':{1}".format(
+ k, v if isinstance(v, list) else "'{0}'".format(v)
+ )
+ )
+ return "{%s}" % ",".join(rows)
+
+ for k, v in sorted(shims.items()):
+ config.append(f"'{k}':{vd(v)},")
+ config.append("},")
+ config.append("});")
+ if len(config) > 2:
+ prefix = "\n".join(config) + "\n"
+ else:
+ prefix = ""
+ js = prefix + """\nrequire([%s], function(%s) { %s });\n""" % (
+ require,
+ ",".join(args),
+ script,
+ )
+ else:
+ js = script
+ if self.only_html:
+ ht += f"\n"
+ return ht, None
+ return ht, js
+
+
+class RenderJSObj(RenderJSRaw):
+ """
+ Renders JS using :epkg:`javascript`.
+ """
+
+ def _ipython_display_(self):
+ """
+ overloads method
+ `_ipython_display_
+ `_.
+ """
+ if "display" not in dir(ipydisplay):
+ # Weird bug introduced in IPython 8.0.0
+ import IPython.core.display_functions
+
+ ipydisplay.display = IPython.core.display_functions.display
+ ht, js = self.generate_html()
+ if js is None:
+ display_html(ht, raw=True)
+ else:
+ display_html(ht, raw=True)
+ display_javascript(js, raw=True)
+
+
+class RenderJS(RenderJSRaw):
+ """
+ Renders :epkg:`javascript`, only outputs :epkg:`HTML`.
+ """
+
+ def _repr_html_(self):
+ """
+ Overloads method *_repr_html_*.
+ """
+ ht, js = self.generate_html()
+ if js is not None:
+ ht += f"\n\n"
+ return ht
+
+
+class RenderJsDot(RenderJS):
+ """
+ Renders a graph in a :epkg:`notebook`
+ defined in :epkg:`DOT` language
+ with :epkg:`viz.js`. On `binder
+ `_,
+ argument *local* should be set to True to be working.
+
+ :param dot: (str) dot
+ :param local: (bool) use local path to javascript dependencies
+ :param script: (str) script
+ :param width: (str) width
+ :param height: (str) height
+ :param style: (str) style (added in ````)
+ :param divid: (str|None) id of the div
+ :param only_html: (bool) use only function
+ `display_html `_
+ and not `display_javascript
+ `_ to add
+ javascript to the page.
+ :param div_class: (str) class of the section ``div``
+ which will host the results of the javascript
+ :param check_urls: (bool) by default, check url exists
+ :param lite: (bool) use lite version
+ (no `neato `_)
+ """
+
+ def __init__(
+ self,
+ dot,
+ local=False,
+ width="100%",
+ height="100%",
+ divid=None,
+ style=None,
+ only_html=True,
+ div_class=None,
+ check_urls=True,
+ lite=False,
+ ):
+ script = RenderJsDot._build_script(dot)
+ libs, css = RenderJsDot._get_libs_css(local, lite)
+ RenderJS.__init__(
+ self,
+ script,
+ width=width,
+ height=height,
+ divid=divid,
+ only_html=only_html,
+ div_class=div_class,
+ check_urls=True,
+ libs=libs,
+ css=css,
+ local=local,
+ )
+
+ @staticmethod
+ def _get_libs_css(local, lite):
+ """
+ Returns the dependencies.
+
+ :param local: use local file (True) or remote urls (False)
+ :param lite: use lite version
+ :return: tuple *(libs, css)*
+ """
+ jsvers = "viz-lite.js" if lite else "viz.js"
+ if local:
+ this = os.path.dirname(__file__)
+ js = os.path.join(this, "..", "js", "vizjs", jsvers)
+ libs = [js]
+ else:
+ libs = ["http://www.xavierdupre.fr/js/vizjs/" + jsvers]
+ css = None
+ return libs, css
+
+ @staticmethod
+ def _build_script(dot):
+ """
+ Builds the javascript script based wrapping the
+ :epkg:`DOT` language.
+
+ :param dot: :epkg:`DOT` language
+ :return: javascript
+ """
+ dot = dot.replace('"', '\\"').replace("\n", "\\n")
+ script = f"""var svgGraph = Viz("{dot}");
+document.getElementById('__ID__').innerHTML = svgGraph;"""
+ return script
diff --git a/pyproject.toml b/pyproject.toml
index ab04ce43..22ea65e6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,15 +47,50 @@ exclude = [
"dist",
]
-# Same as Black.
line-length = 88
-[tool.ruff.mccabe]
-# Unlike Flake8, default to a complexity level of 10.
-max-complexity = 10
+[tool.ruff.lint]
+select = [
+ "B", # flake8-bugbear
+ "C4", # flake8-comprehensions
+ #"D", # pydocstyle
+ "E", # pycodestyle
+ "F", # Pyflakes
+ "G", # flake8-logging-format
+ #"I", # isort
+ "ISC", # flake8-implicit-str-concat
+ "LOG", # flake8-logging
+ #"N", # pep8-naming
+ #"NPY", # modern numpy
+ #"PERF", # Perflint
+ "PIE", # flake8-pie
+ "PYI", # flake8-pyi
+ "RUF", # Ruff-specific rules
+ "SIM", # flake8-simplify
+ "SLOT", # flake8-slot
+ "T10", # flake8-debugger
+ #"TID", # Disallow relative imports
+ #"TRY", # flake8-try-except-raise
+ "UP", # pyupgrade
+ "W", # pycodestyle
+ "YTT", # flake8-2020
+]
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
+"**" = [
+ "B905",
+ "C401", "C408", "C413",
+ "RUF012", "RUF100", "RUF010",
+ "SIM108", "SIM910", "SIM110", "SIM102", "SIM114", "SIM103",
+ "UP015", "UP027", "UP031", "UP034", "UP032", "UP006", "UP035", "UP007", "UP030", "UP038"
+]
+"_unittests/**" = ["SIM113", "RUF005", "E402"]
+"**/plot*.py" = ["B018"]
"_doc/sphinxdoc/source/conf.py" = ["F821"]
+"_doc/notebooks/dsgarden/**" = ["B007", "E402"]
+"_doc/notebooks/metric/**" = ["C400", "RUF005", "B007", "C417"]
+"_doc/notebooks/ml/**" = ["E402", "B007", "RUF005"]
+"_doc/notebooks/nlp/**" = ["RUF005", "E501", "F811", "E401", "E402"]
"mlstatpy/__init__.py" = ["E501"]
"mlstatpy/graph/__init__.py" = ["F401"]
"mlstatpy/graph/graph_distance.py" = ["E731"]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 3d50d2e2..04d8189a 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,12 +1,11 @@
astroid
-autopep8
black
-black-nb
blockdiag
coverage
Cython
cytoolz
dill
+graphviz
hummingbird-ml
ijson
importlib_metadata
@@ -16,39 +15,33 @@ isort
jdcal
jupyter_sphinx
jupyter
-jyquickhelper
+jupyter-black
lifelines
-mako
matplotlib
memory_profiler
mlinsights
nbconvert
nbsphinx
notebook
-onnx_array_api
+onnx-array-api
+onnx-extended
onnxruntime>=1.12
pandas
pillow
psutil
pybind11
-pycodestyle
pydata_sphinx_theme
-pyensae>=1.2.788
pyinstrument
-pylint>=2.14.0
-pyquickhelper>=1.12
pytest
ruff
seaborn
-setuptools
snakeviz
scikit-learn>=1.1
-git+https://github.com/onnx/sklearn-onnx.git
+skl2onnx
sphinx
sphinx-gallery
sphinx-issues
git+https://github.com/sdpython/sphinx-runpython.git
-sphinxcontrib.imagesvg
stack_data
statsmodels
tqdm
diff --git a/setup.py b/setup.py
index 12f404c1..57ca3446 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,4 @@
-# -*- coding: utf-8 -*-
import os
-
from setuptools import setup
######################