|
20 | 20 | from licensedcode.match import filter_overlapping_matches |
21 | 21 | from licensedcode.match import get_full_matched_text |
22 | 22 | from licensedcode.match import get_matching_regions |
| 23 | +from licensedcode.match import is_extra_words_position_valid |
23 | 24 | from licensedcode.match import LicenseMatch |
24 | 25 | from licensedcode.match import merge_matches |
25 | 26 | from licensedcode.match import reportable_tokens |
@@ -1321,6 +1322,106 @@ def test_get_matching_regions_3_lines_enough(self): |
1321 | 1322 | assert matches[5].qspan in regions[1] |
1322 | 1323 |
|
1323 | 1324 |
|
| 1325 | +class TestExtraWordsPosition(FileBasedTesting): |
| 1326 | + test_data_dir = TEST_DATA_DIR |
| 1327 | + |
| 1328 | + def test_valid_extra_words_within_limit(self): |
| 1329 | + rule_text = """ |
| 1330 | + Redistribution and use [[4]] in source and binary forms are permitted. |
| 1331 | + """ |
| 1332 | + rule = create_rule_from_text_and_expression( |
| 1333 | + license_expression='extra-words', |
| 1334 | + text=rule_text |
| 1335 | + ) |
| 1336 | + idx = index.LicenseIndex([rule]) |
| 1337 | + |
| 1338 | + query = """ |
| 1339 | + Redistribution and use of this software in source and binary forms are permitted. |
| 1340 | + """ |
| 1341 | + match = idx.match(query_string=query, _skip_hash_match=True)[0] |
| 1342 | + assert is_extra_words_position_valid(match) is True |
| 1343 | + |
| 1344 | + def test_invalid_extra_words_exceed_limit(self): |
| 1345 | + rule_text = """ |
| 1346 | + Redistribution and use [[2]] in source and binary forms are permitted. |
| 1347 | + """ |
| 1348 | + rule = create_rule_from_text_and_expression( |
| 1349 | + license_expression='extra-words', |
| 1350 | + text=rule_text |
| 1351 | + ) |
| 1352 | + idx = index.LicenseIndex([rule]) |
| 1353 | + |
| 1354 | + query = """ |
| 1355 | + Redistribution and use of this software in source and binary forms are permitted. |
| 1356 | + """ |
| 1357 | + match = idx.match(query_string=query, _skip_hash_match=True)[0] |
| 1358 | + assert is_extra_words_position_valid(match) is False |
| 1359 | + |
| 1360 | + def test_no_extra_words_allowed(self): |
| 1361 | + rule_text = """ |
| 1362 | + Redistribution and use in source and binary forms are permitted. |
| 1363 | + """ |
| 1364 | + rule = create_rule_from_text_and_expression( |
| 1365 | + license_expression='extra-words', |
| 1366 | + text=rule_text |
| 1367 | + ) |
| 1368 | + idx = index.LicenseIndex([rule]) |
| 1369 | + |
| 1370 | + query = """ |
| 1371 | + Redistribution and use of software in source and binary forms are permitted. |
| 1372 | + """ |
| 1373 | + match = idx.match(query_string=query, _skip_hash_match=True)[0] |
| 1374 | + assert is_extra_words_position_valid(match) is False |
| 1375 | + |
| 1376 | + def test_multiple_extra_spans_valid(self): |
| 1377 | + rule_text = """ |
| 1378 | + Redistribution [[2]] and use [[1]] in source and binary forms are permitted. |
| 1379 | + """ |
| 1380 | + rule = create_rule_from_text_and_expression( |
| 1381 | + license_expression='extra-words', |
| 1382 | + text=rule_text |
| 1383 | + ) |
| 1384 | + idx = index.LicenseIndex([rule]) |
| 1385 | + |
| 1386 | + query = """ |
| 1387 | + Redistribution of content and use again in source and binary forms are permitted. |
| 1388 | + """ |
| 1389 | + match = idx.match(query_string=query, _skip_hash_match=True)[0] |
| 1390 | + assert is_extra_words_position_valid(match) is True |
| 1391 | + |
| 1392 | + def test_extra_words_at_wrong_position(self): |
| 1393 | + rule_text = """ |
| 1394 | + Redistribution and use [[2]] in source and binary forms are permitted. |
| 1395 | + """ |
| 1396 | + rule = create_rule_from_text_and_expression( |
| 1397 | + license_expression='extra-words', |
| 1398 | + text=rule_text |
| 1399 | + ) |
| 1400 | + idx = index.LicenseIndex([rule]) |
| 1401 | + |
| 1402 | + query = """ |
| 1403 | + Redistribution and amazing use in great source and binary forms are permitted. |
| 1404 | + """ |
| 1405 | + match = idx.match(query_string=query, _skip_hash_match=True)[0] |
| 1406 | + assert is_extra_words_position_valid(match) is False |
| 1407 | + |
| 1408 | + def test_exact_match_without_extra_markers(self): |
| 1409 | + rule_text = """ |
| 1410 | + Redistribution and use in source and binary forms are permitted. |
| 1411 | + """ |
| 1412 | + rule = create_rule_from_text_and_expression( |
| 1413 | + license_expression='extra-words', |
| 1414 | + text=rule_text |
| 1415 | + ) |
| 1416 | + idx = index.LicenseIndex([rule]) |
| 1417 | + |
| 1418 | + query = """ |
| 1419 | + Redistribution and use in source and binary forms are permitted. |
| 1420 | + """ |
| 1421 | + match = idx.match(query_string=query, _skip_hash_match=True)[0] |
| 1422 | + assert is_extra_words_position_valid(match) is False |
| 1423 | + |
| 1424 | + |
1324 | 1425 | class TestLicenseMatchScore(FileBasedTesting): |
1325 | 1426 | test_data_dir = TEST_DATA_DIR |
1326 | 1427 |
|
|
0 commit comments