Skip to content

Commit 737d240

Browse files
committed
Improve Pypi description cleanup #2465
Description lines are sometimes padded with 8 spaces. When this happen these need to be trimmed and this was not taking place correctly Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent d9c03ea commit 737d240

File tree

2 files changed

+21
-8
lines changed

2 files changed

+21
-8
lines changed

src/packagedcode/pypi.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -365,15 +365,28 @@ def get_description(metainfo, location=None):
365365

366366
def clean_description(description):
367367
"""
368-
Return a cleaned description, removing extra leading whitespaces.
368+
Return a cleaned description text, removing extra leading whitespaces if
369+
needed. Some metadata formats padd each description line with 8 spaces. Some
370+
do not. We check first and cleanup if needed.
369371
"""
370-
desc_lines = []
371-
for line in (description or '').strip().splitlines(False):
372-
if line.startswith(' ' * 8):
373-
line = line[8:]
374-
desc_lines.append(line)
372+
# TODO: verify what is the impact of Description-Content-Type: if any
373+
description = description or ''
374+
description = description.strip()
375+
lines = description.splitlines(False)
375376

376-
return '\n'.join(desc_lines)
377+
space_padding = ' ' * 8
378+
379+
# we need cleaning if any of the first two lines starts with 8 spaces
380+
need_cleaning = any(l.startswith(space_padding) for l in lines[:2])
381+
if not need_cleaning:
382+
return description
383+
384+
cleaned_lines = [
385+
line[8:] if line.startswith(space_padding) else line
386+
for line in lines
387+
]
388+
389+
return '\n'.join(cleaned_lines)
377390

378391

379392
def get_legacy_description(location):

tests/packagedcode/data/pypi/unpacked_wheel/metadata-2.0/Jinja2-2.10.dist-info-expected.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"qualifiers": {},
77
"subpath": null,
88
"primary_language": "Python",
9-
"description": "A small but fast and easy to use stand-alone template engine written in pure python.\nJinja2\n~~~~~~\n\nJinja2 is a template engine written in pure Python. It provides a\n`Django`_ inspired non-XML syntax but supports inline expressions and\nan optional `sandboxed`_ environment.\n\nNutshell\n--------\n\nHere a small example of a Jinja template::\n\n {% extends 'base.html' %}\n {% block title %}Memberlist{% endblock %}\n {% block content %}\n <ul>\n {% for user in users %}\n<li><a href=\"{{ user.url }}\">{{ user.username }}</a></li>\n {% endfor %}\n </ul>\n {% endblock %}\n\nPhilosophy\n----------\n\nApplication logic is for the controller but don't try to make the life\nfor the template designer too hard by giving him too few functionality.\n\nFor more informations visit the new `Jinja2 webpage`_ and `documentation`_.\n\n.. _sandboxed: https://en.wikipedia.org/wiki/Sandbox_(computer_security)\n.. _Django: https://www.djangoproject.com/\n.. _Jinja2 webpage: http://jinja.pocoo.org/\n.. _documentation: http://jinja.pocoo.org/2/documentation/",
9+
"description": "A small but fast and easy to use stand-alone template engine written in pure python.\nJinja2\n~~~~~~\n\nJinja2 is a template engine written in pure Python. It provides a\n`Django`_ inspired non-XML syntax but supports inline expressions and\nan optional `sandboxed`_ environment.\n\nNutshell\n--------\n\nHere a small example of a Jinja template::\n\n {% extends 'base.html' %}\n {% block title %}Memberlist{% endblock %}\n {% block content %}\n <ul>\n {% for user in users %}\n <li><a href=\"{{ user.url }}\">{{ user.username }}</a></li>\n {% endfor %}\n </ul>\n {% endblock %}\n\nPhilosophy\n----------\n\nApplication logic is for the controller but don't try to make the life\nfor the template designer too hard by giving him too few functionality.\n\nFor more informations visit the new `Jinja2 webpage`_ and `documentation`_.\n\n.. _sandboxed: https://en.wikipedia.org/wiki/Sandbox_(computer_security)\n.. _Django: https://www.djangoproject.com/\n.. _Jinja2 webpage: http://jinja.pocoo.org/\n.. _documentation: http://jinja.pocoo.org/2/documentation/",
1010
"release_date": null,
1111
"parties": [
1212
{

0 commit comments

Comments
 (0)