Skip to content

Commit fc5f639

Browse files
authored
Remove the dependency on scipy #1754 (#1977)
Signed-off-by: tdruez <[email protected]>
1 parent 2741b14 commit fc5f639

File tree

3 files changed

+38
-3
lines changed

3 files changed

+38
-3
lines changed

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@ dependencies = [
100100
"aboutcode.hashid==0.2.0",
101101
# AboutCode pipeline
102102
"aboutcode.pipeline==0.2.1",
103-
"scipy==1.15.3", # 1.16.x requires Python >=3.11
104103
# ScoreCode
105104
"scorecode==0.0.4",
106105
# Workaround issue https://github.com/aboutcode-org/scancode.io/issues/1885

scanpipe/pipes/symbolmap.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,11 @@
2020
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
2121
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222

23+
import math
2324
from collections import Counter
2425
from dataclasses import dataclass
2526
from dataclasses import field
2627

27-
from scipy.spatial.distance import jensenshannon
28-
2928
from aboutcode.pipeline import LoopProgress
3029
from scanpipe.models import CodebaseRelation
3130
from scanpipe.pipes import flag
@@ -292,6 +291,21 @@ def get_symbols_probability_distribution(symbols, unique_symbols):
292291
return probability_dist
293292

294293

294+
def jensenshannon(p, q):
295+
"""Compute the Jensen-Shannon distance between two probability distributions."""
296+
m = [(pi + qi) / 2.0 for pi, qi in zip(p, q)]
297+
298+
left = 0.0
299+
right = 0.0
300+
for pi, qi, mi in zip(p, q, m):
301+
if pi > 0:
302+
left += pi * math.log(pi / mi)
303+
if qi > 0:
304+
right += qi * math.log(qi / mi)
305+
306+
return math.sqrt((left + right) / 2.0)
307+
308+
295309
def get_similarity_between_source_and_deployed_symbols(
296310
source_symbols,
297311
deployed_symbols,

scanpipe/tests/pipes/test_symbolmap.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,28 @@ def test_scanpipe_pipes_symbolmap_get_symbols_probability_distribution(self):
277277
# print(result_prob_dist)
278278
self.assertListEqual(result_prob_dist, expected_prob_dist)
279279

280+
def test_jensenshannon(self):
281+
# Identical distributions -> distance is 0
282+
self.assertEqual(symbolmap.jensenshannon([1.0, 0.0, 0.0], [1.0, 0.0, 0.0]), 0.0)
283+
284+
# Completely different distributions -> maximum distance
285+
self.assertAlmostEqual(
286+
symbolmap.jensenshannon([1.0, 0.0], [0.0, 1.0]), 0.8325546, places=5
287+
)
288+
289+
# Partial overlap
290+
self.assertAlmostEqual(
291+
symbolmap.jensenshannon([1.0, 0.0], [0.5, 0.5]),
292+
0.46450140402245893,
293+
places=5,
294+
)
295+
296+
# Uniform distributions -> distance is 0
297+
self.assertEqual(
298+
symbolmap.jensenshannon([0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]),
299+
0.0,
300+
)
301+
280302
def test_get_similarity_between_source_and_deployed_symbols(
281303
self,
282304
):

0 commit comments

Comments
 (0)