Skip to content

Commit bf75e66

Browse files
authored
feat: detect typos in project names during upload (#17649)
* feat: detect typos in project names during upload Follows approaches described in `typoguard` paper and `typomania` project, with changes for local conditions. * make translations Signed-off-by: Mike Fiedler <[email protected]> --------- Signed-off-by: Mike Fiedler <[email protected]>
1 parent a1f73fd commit bf75e66

File tree

9 files changed

+683
-11
lines changed

9 files changed

+683
-11
lines changed

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ def get_app_config(database, nondefaults=None):
325325
"billing.api_version": "2020-08-27",
326326
"mail.backend": "warehouse.email.services.SMTPEmailSender",
327327
"helpdesk.backend": "warehouse.helpdesk.services.ConsoleHelpDeskService",
328-
"helpdesk.notification_backend": "warehouse.helpdesk.services.ConsoleHelpDeskService", # noqa: E501
328+
"helpdesk.notification_backend": "warehouse.helpdesk.services.ConsoleAdminNotificationService", # noqa: E501
329329
"files.url": "http://localhost:7000/",
330330
"archive_files.url": "http://localhost:7000/archive",
331331
"sessions.secret": "123456",

tests/functional/forklift/test_legacy.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,73 @@ def test_duplicate_file_upload_error(webtest):
210210
assert "File already exists" in resp.body.decode()
211211

212212

213+
def test_typo_check_name_upload_passes(webtest, monkeypatch):
214+
"""
215+
Test not blocking the upload of a release with a typo in the project name,
216+
and emits a notification to the admins.
217+
"""
218+
# TODO: Replace with a better way to generate corpus
219+
monkeypatch.setattr(
220+
"warehouse.packaging.typosnyper._TOP_PROJECT_NAMES",
221+
{"wutang", "requests"},
222+
)
223+
224+
# Set up user, credentials
225+
user = UserFactory.create(with_verified_primary_email=True, clear_pwd="password")
226+
# Construct the macaroon
227+
dm = MacaroonFactory.create(
228+
user_id=user.id,
229+
caveats=[caveats.RequestUser(user_id=str(user.id))],
230+
)
231+
m = pymacaroons.Macaroon(
232+
location="localhost",
233+
identifier=str(dm.id),
234+
key=dm.key,
235+
version=pymacaroons.MACAROON_V2,
236+
)
237+
for caveat in dm.caveats:
238+
m.add_first_party_caveat(caveats.serialize(caveat))
239+
serialized_macaroon = f"pypi-{m.serialize()}"
240+
credentials = base64.b64encode(f"__token__:{serialized_macaroon}".encode()).decode(
241+
"utf-8"
242+
)
243+
244+
# use a dummy file for the upload, the filename/metadata doesn't matter here
245+
with open("./tests/functional/_fixtures/sampleproject-3.0.0.tar.gz", "rb") as f:
246+
content = f.read()
247+
248+
# Construct params and upload
249+
params = MultiDict(
250+
{
251+
":action": "file_upload",
252+
"protocol_version": "1",
253+
"name": "wutamg", # Here is the typo
254+
"sha256_digest": (
255+
"117ed88e5db073bb92969a7545745fd977ee85b7019706dd256a64058f70963d"
256+
),
257+
"filetype": "sdist",
258+
"metadata_version": "2.1",
259+
"version": "3.0.0",
260+
}
261+
)
262+
webtest.post(
263+
"/legacy/",
264+
headers={"Authorization": f"Basic {credentials}"},
265+
params=params,
266+
upload_files=[("content", "wutamg-3.0.0.tar.gz", content)], # and here
267+
status=HTTPStatus.OK,
268+
)
269+
270+
assert user.projects
271+
assert len(user.projects) == 1
272+
project = user.projects[0]
273+
assert project.name == "wutamg" # confirming it passed
274+
assert project.releases
275+
assert len(project.releases) == 1
276+
release = project.releases[0]
277+
assert release.version == "3.0.0"
278+
279+
213280
def test_invalid_classifier_upload_error(webtest):
214281
user = UserFactory.create(with_verified_primary_email=True, clear_pwd="password")
215282

tests/unit/packaging/test_services.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
ProjectNameUnavailableProhibitedError,
3535
ProjectNameUnavailableSimilarError,
3636
ProjectNameUnavailableStdlibError,
37+
ProjectNameUnavailableTypoSquattingError,
3738
)
3839
from warehouse.packaging.services import (
3940
B2FileStorage,
@@ -1050,6 +1051,14 @@ def test_check_project_name_too_similar_multiple_existing(self, db_session):
10501051
or exc.value.similar_project_name == project2.name
10511052
)
10521053

1054+
def test_check_project_name_typosquatting_prohibited(self, db_session):
1055+
# TODO: Update this test once we have a dynamic TopN approach
1056+
service = ProjectService(session=db_session)
1057+
ProhibitedProjectFactory.create(name="numpy")
1058+
1059+
with pytest.raises(ProjectNameUnavailableTypoSquattingError):
1060+
service.check_project_name("numpi")
1061+
10531062
def test_check_project_name_ok(self, db_session):
10541063
service = ProjectService(session=db_session)
10551064

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
12+
13+
import pytest
14+
15+
from warehouse.packaging.typosnyper import typo_check_name
16+
17+
18+
@pytest.mark.parametrize(
19+
("name", "expected"),
20+
[
21+
("numpy", None), # Pass, no typos, exists
22+
("NuMpy", None), # Pass, same as `numpy` after canonicalization
23+
("nuumpy", ("repeated_characters", "numpy")),
24+
("reequests", ("repeated_characters", "requests")),
25+
("sphnx", ("omitted_characters", "sphinx")),
26+
("python-dteutil", ("omitted_characters", "python-dateutil")),
27+
("pythondateutil", ("omitted_characters", "python-dateutil")),
28+
("jinj2a", ("swapped_characters", "jinja2")),
29+
("dateutil-python", ("swapped_words", "python-dateutil")),
30+
("numpi", ("common_typos", "numpy")),
31+
("requestz", ("common_typos", "requests")),
32+
],
33+
)
34+
def test_typo_check_name(name, expected, monkeypatch):
35+
# Set known entries in the _TOP_PROJECT_NAMES list
36+
# TODO: Replace with a better way to generate corpus
37+
monkeypatch.setattr(
38+
"warehouse.packaging.typosnyper._TOP_PROJECT_NAMES",
39+
{
40+
"numpy",
41+
"requests",
42+
"sphinx",
43+
"beautifulsoup4",
44+
"jinja2",
45+
"python-dateutil",
46+
},
47+
)
48+
49+
assert typo_check_name(name) == expected

warehouse/locale/messages.pot

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ msgstr ""
389389
msgid "Select project"
390390
msgstr ""
391391

392-
#: warehouse/manage/forms.py:506 warehouse/oidc/forms/_core.py:29
392+
#: warehouse/manage/forms.py:506 warehouse/oidc/forms/_core.py:33
393393
#: warehouse/oidc/forms/gitlab.py:57
394394
msgid "Specify project name"
395395
msgstr ""
@@ -658,45 +658,45 @@ msgstr ""
658658
msgid "Expired invitation for '${username}' deleted."
659659
msgstr ""
660660

661-
#: warehouse/oidc/forms/_core.py:31 warehouse/oidc/forms/_core.py:42
661+
#: warehouse/oidc/forms/_core.py:35 warehouse/oidc/forms/_core.py:46
662662
#: warehouse/oidc/forms/gitlab.py:60 warehouse/oidc/forms/gitlab.py:64
663663
msgid "Invalid project name"
664664
msgstr ""
665665

666-
#: warehouse/oidc/forms/_core.py:60
666+
#: warehouse/oidc/forms/_core.py:64
667667
#, python-brace-format
668668
msgid ""
669669
"This project already exists: use the project's publishing settings <a "
670670
"href='${url}'>here</a> to create a Trusted Publisher for it."
671671
msgstr ""
672672

673-
#: warehouse/oidc/forms/_core.py:69
673+
#: warehouse/oidc/forms/_core.py:73
674674
msgid "This project already exists."
675675
msgstr ""
676676

677-
#: warehouse/oidc/forms/_core.py:74
677+
#: warehouse/oidc/forms/_core.py:78
678678
msgid "This project name isn't allowed"
679679
msgstr ""
680680

681-
#: warehouse/oidc/forms/_core.py:78
681+
#: warehouse/oidc/forms/_core.py:82
682682
msgid "This project name is too similar to an existing project"
683683
msgstr ""
684684

685-
#: warehouse/oidc/forms/_core.py:83
685+
#: warehouse/oidc/forms/_core.py:87
686686
msgid ""
687687
"This project name isn't allowed (conflict with the Python standard "
688688
"library module name)"
689689
msgstr ""
690690

691-
#: warehouse/oidc/forms/_core.py:99 warehouse/oidc/forms/_core.py:110
691+
#: warehouse/oidc/forms/_core.py:115 warehouse/oidc/forms/_core.py:126
692692
msgid "Specify a publisher ID"
693693
msgstr ""
694694

695-
#: warehouse/oidc/forms/_core.py:100 warehouse/oidc/forms/_core.py:111
695+
#: warehouse/oidc/forms/_core.py:116 warehouse/oidc/forms/_core.py:127
696696
msgid "Publisher must be specified by ID"
697697
msgstr ""
698698

699-
#: warehouse/oidc/forms/_core.py:116
699+
#: warehouse/oidc/forms/_core.py:132
700700
msgid "Specify an environment name"
701701
msgstr ""
702702

warehouse/oidc/forms/_core.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# See the License for the specific language governing permissions and
1111
# limitations under the License.
1212
import markupsafe
13+
import structlog
1314
import wtforms
1415

1516
from warehouse.i18n import localize as _
@@ -19,9 +20,12 @@
1920
ProjectNameUnavailableProhibitedError,
2021
ProjectNameUnavailableSimilarError,
2122
ProjectNameUnavailableStdlibError,
23+
ProjectNameUnavailableTypoSquattingError,
2224
)
2325
from warehouse.utils.project import PROJECT_NAME_RE
2426

27+
log = structlog.get_logger()
28+
2529

2630
class PendingPublisherMixin:
2731
project_name = wtforms.StringField(
@@ -84,6 +88,18 @@ def validate_project_name(self, field):
8488
" standard library module name)"
8589
)
8690
)
91+
# TODO: Cover with testing and remove pragma
92+
except ProjectNameUnavailableTypoSquattingError as exc: # pragma: no cover
93+
# TODO: raise with an appropriate message when we're ready to implement
94+
# or combine with `ProjectNameUnavailableSimilarError`
95+
# TODO: This is an attempt at structlog, since `request.log` isn't in scope.
96+
# We should be able to use `log` instead, but doesn't have the same output
97+
log.error(
98+
"Typo-squatting error raised but not handled in form validation",
99+
check_name=exc.check_name,
100+
existing_project_name=exc.existing_project_name,
101+
)
102+
pass
87103

88104
@property
89105
def provider(self) -> str: # pragma: no cover

warehouse/packaging/interfaces.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,11 @@ class ProjectNameUnavailableSimilarError(ProjectNameUnavailableError):
130130

131131
def __init__(self, similar_project_name: str):
132132
self.similar_project_name: str = similar_project_name
133+
134+
135+
class ProjectNameUnavailableTypoSquattingError(ProjectNameUnavailableError):
136+
"""Project name is a typo of an existing project."""
137+
138+
def __init__(self, check_name: str, existing_project_name: str):
139+
self.check_name: str = check_name
140+
self.existing_project_name: str = existing_project_name

0 commit comments

Comments
 (0)