|
21 | 21 | from licensedcode.match import get_full_matched_text |
22 | 22 | from licensedcode.match import get_matching_regions |
23 | 23 | from licensedcode.match import is_extra_words_position_valid |
| 24 | +from licensedcode.match import is_extra_words_at_valid_positions |
24 | 25 | from licensedcode.match import LicenseMatch |
25 | 26 | from licensedcode.match import merge_matches |
26 | 27 | from licensedcode.match import reportable_tokens |
@@ -1400,9 +1401,11 @@ def test_extra_words_at_wrong_position(self): |
1400 | 1401 | idx = index.LicenseIndex([rule]) |
1401 | 1402 |
|
1402 | 1403 | query = """ |
1403 | | - Redistribution and amazing use in great source and binary forms are permitted. |
| 1404 | + Redistribution and amazing use in source and binary forms are permitted. |
1404 | 1405 | """ |
| 1406 | + # here 'amazing' word are at wrong place |
1405 | 1407 | match = idx.match(query_string=query, _skip_hash_match=True)[0] |
| 1408 | + |
1406 | 1409 | assert is_extra_words_position_valid(match) is False |
1407 | 1410 |
|
1408 | 1411 | def test_exact_match_without_extra_markers(self): |
@@ -1437,6 +1440,140 @@ def test_extra_words_one_at_right_place_and_one_at_not_right_place(self): |
1437 | 1440 | match = idx.match(query_string=query, _skip_hash_match=True)[0] |
1438 | 1441 | assert is_extra_words_position_valid(match) is False |
1439 | 1442 |
|
| 1443 | + def test_extra_words_if_one_match_have_extra_words_at_right_place_and_another_match_have_no_extra_words(self): |
| 1444 | + r1_text = "Redistribution and use [[3]] in source and binary forms are permitted." |
| 1445 | + r1 = create_rule_from_text_and_expression( |
| 1446 | + license_expression='extra-words', |
| 1447 | + text=r1_text |
| 1448 | + ) |
| 1449 | + |
| 1450 | + r2_text = "under the MIT license" |
| 1451 | + r2 = create_rule_from_text_and_expression( |
| 1452 | + license_expression='mit', |
| 1453 | + text=r2_text |
| 1454 | + ) |
| 1455 | + |
| 1456 | + idx = index.LicenseIndex([r1,r2]) |
| 1457 | + |
| 1458 | + query = """ |
| 1459 | + Redistribution and use of this software in source and binary forms are permitted. |
| 1460 | + under the MIT license |
| 1461 | + """ |
| 1462 | + |
| 1463 | + matches = idx.match(query_string=query, _skip_hash_match=True) |
| 1464 | + |
| 1465 | + assert len(matches) == 2 |
| 1466 | + assert is_extra_words_at_valid_positions(matches) is True |
| 1467 | + |
| 1468 | + def test_extra_words_if_one_match_have_extra_words_at_right_place_but_exceed_limit_and_another_match_have_no_extra_words(self): |
| 1469 | + r1_text = "Redistribution and use [[3]] in source and binary forms are permitted." |
| 1470 | + r1 = create_rule_from_text_and_expression( |
| 1471 | + license_expression='extra-words', |
| 1472 | + text=r1_text |
| 1473 | + ) |
| 1474 | + |
| 1475 | + r2_text = "under the MIT license" |
| 1476 | + r2 = create_rule_from_text_and_expression( |
| 1477 | + license_expression='mit', |
| 1478 | + text=r2_text |
| 1479 | + ) |
| 1480 | + |
| 1481 | + idx = index.LicenseIndex([r1,r2]) |
| 1482 | + |
| 1483 | + query = """ |
| 1484 | + Redistribution and use of this software AAA in source and binary forms are permitted. |
| 1485 | + under the MIT license |
| 1486 | + """ |
| 1487 | + |
| 1488 | + matches = idx.match(query_string=query, _skip_hash_match=True) |
| 1489 | + |
| 1490 | + assert len(matches) == 2 |
| 1491 | + |
| 1492 | + # one match have `extra-words` but it exceed the limit here there are |
| 1493 | + # four `extra-words` i.e 'of','this','software','AAA' |
| 1494 | + assert is_extra_words_at_valid_positions(matches) is False |
| 1495 | + |
| 1496 | + def test_extra_words_if_all_match_have_no_extra_words(self): |
| 1497 | + r1_text = "Redistribution and use in source and binary forms are permitted." |
| 1498 | + r1 = create_rule_from_text_and_expression( |
| 1499 | + license_expression='extra-words', |
| 1500 | + text=r1_text |
| 1501 | + ) |
| 1502 | + |
| 1503 | + r2_text = "under the MIT license" |
| 1504 | + r2 = create_rule_from_text_and_expression( |
| 1505 | + license_expression='mit', |
| 1506 | + text=r2_text |
| 1507 | + ) |
| 1508 | + |
| 1509 | + idx = index.LicenseIndex([r1,r2]) |
| 1510 | + |
| 1511 | + query = """ |
| 1512 | + Redistribution and use in source and binary forms are permitted. |
| 1513 | + under the MIT license |
| 1514 | + """ |
| 1515 | + |
| 1516 | + matches = idx.match(query_string=query, _skip_hash_match=True) |
| 1517 | + |
| 1518 | + assert len(matches) == 2 |
| 1519 | + |
| 1520 | + assert is_extra_words_at_valid_positions(matches) is False |
| 1521 | + |
| 1522 | + def test_extra_words_if_one_match_have_extra_words_at_right_place_and_another_match_at_wrong_place(self): |
| 1523 | + r1_text = "Redistribution and use [[3]] in source and binary forms are permitted." |
| 1524 | + r1 = create_rule_from_text_and_expression( |
| 1525 | + license_expression='extra-words', |
| 1526 | + text=r1_text |
| 1527 | + ) |
| 1528 | + |
| 1529 | + r2_text = "Neither the name of [[3]] nor the names of its" |
| 1530 | + r2 = create_rule_from_text_and_expression( |
| 1531 | + license_expression='extra-words2', |
| 1532 | + text=r2_text |
| 1533 | + ) |
| 1534 | + |
| 1535 | + idx = index.LicenseIndex([r1,r2]) |
| 1536 | + |
| 1537 | + query = """ |
| 1538 | + Redistribution and use of this software in source and binary forms are permitted. |
| 1539 | + Neither the name of William Henry James nor the names of Harris its |
| 1540 | + """ |
| 1541 | + |
| 1542 | + matches = idx.match(query_string=query, _skip_hash_match=True) |
| 1543 | + |
| 1544 | + assert len(matches) == 2 |
| 1545 | + |
| 1546 | + # one match have `extra-words` at correct place but another match |
| 1547 | + # have `extra-words` at correct place but one words 'Harris' at wrong place |
| 1548 | + # this `is_extra_words_at_valid_positions` return True because one match |
| 1549 | + # have `extra-words` at correct place |
| 1550 | + assert is_extra_words_at_valid_positions(matches) is True |
| 1551 | + |
| 1552 | + def test_extra_words_all_match_have_extra_words_at_right_place(self): |
| 1553 | + r1_text = "Redistribution and use [[3]] in source and binary forms are permitted." |
| 1554 | + r1 = create_rule_from_text_and_expression( |
| 1555 | + license_expression='extra-words', |
| 1556 | + text=r1_text |
| 1557 | + ) |
| 1558 | + |
| 1559 | + r2_text = "Neither the name of [[3]] nor the names of its" |
| 1560 | + r2 = create_rule_from_text_and_expression( |
| 1561 | + license_expression='extra-words2', |
| 1562 | + text=r2_text |
| 1563 | + ) |
| 1564 | + |
| 1565 | + idx = index.LicenseIndex([r1,r2]) |
| 1566 | + |
| 1567 | + query = """ |
| 1568 | + Redistribution and use of this software in source and binary forms are permitted. |
| 1569 | + Neither the name of William Henry James nor the names of its |
| 1570 | + """ |
| 1571 | + |
| 1572 | + matches = idx.match(query_string=query, _skip_hash_match=True) |
| 1573 | + |
| 1574 | + assert len(matches) == 2 |
| 1575 | + assert is_extra_words_at_valid_positions(matches) is True |
| 1576 | + |
1440 | 1577 |
|
1441 | 1578 | class TestLicenseMatchScore(FileBasedTesting): |
1442 | 1579 | test_data_dir = TEST_DATA_DIR |
|
0 commit comments