Skip to content

Commit c8c70b0

Browse files
authored
DOP-5816: Avoid parsing txt files in reserved directories (#669)
1 parent 2398133 commit c8c70b0

File tree

3 files changed

+122
-2
lines changed

3 files changed

+122
-2
lines changed

snooty/postprocess.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
from .page import Page
7474
from .target_database import TargetDatabase
7575
from .types import Facet, ProjectConfig
76-
from .util import EXT_FOR_PAGE, SOURCE_FILE_EXTENSIONS, bundle
76+
from .util import EXT_FOR_PAGE, SOURCE_FILE_EXTENSIONS, bundle, is_txt_in_reserved_dir
7777

7878
logger = logging.getLogger(__name__)
7979
_T = TypeVar("_T")
@@ -191,6 +191,9 @@ def propagate_facets(pages: Dict[FileId, Page], context: Context) -> None:
191191
continue
192192

193193
file_path = Path(os.path.join(base, file))
194+
if is_txt_in_reserved_dir(file_path):
195+
continue
196+
194197
fileid = config.get_fileid(file_path)
195198

196199
if ext == ".ast":

snooty/test_postprocess.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4794,3 +4794,104 @@ def test_dismissible_skills_card() -> None:
47944794
"skill": "WOW Lightsaber Skill",
47954795
"url": "https://learn.mongodb.com/courses/crud-operations-in-mongodb",
47964796
}
4797+
4798+
4799+
def test_reserved_dirs() -> None:
4800+
with make_test(
4801+
{
4802+
Path(
4803+
"source/index.txt"
4804+
): """
4805+
========
4806+
Homepage
4807+
========
4808+
4809+
txt files inside of code-examples directories should not be parsed as reStructuredText.
4810+
4811+
.. literalinclude:: /code-examples/test.txt
4812+
4813+
.. literalinclude:: /includes/code-examples/test.txt
4814+
4815+
rst files inside of code-examples directories are okay to parse.
4816+
4817+
.. include:: /includes/code-examples/foo.rst
4818+
4819+
""",
4820+
Path(
4821+
"source/code-examples.txt"
4822+
): """
4823+
:orphan:
4824+
4825+
==================
4826+
Code Examples Page
4827+
==================
4828+
4829+
This page exists to make sure that pages titled code-examples are okay to have.
4830+
4831+
""",
4832+
Path(
4833+
"source/code-examples/test.txt"
4834+
): """
4835+
This is a code example and should not be captured as a page.
4836+
""",
4837+
Path(
4838+
"source/includes/code-examples/test.txt"
4839+
): """
4840+
This is another code example, but nested in a subdirectory, and should not be captured as a page.
4841+
""",
4842+
Path(
4843+
"source/includes/code-examples/foo.rst"
4844+
): """
4845+
This file makes sure that rst files nested in a code-examples subdirectory is okay.
4846+
4847+
.. warning::
4848+
4849+
This is a test.
4850+
4851+
""",
4852+
}
4853+
) as result:
4854+
# txt files nested under code-examples directories should not be parsed as reStructuredText
4855+
assert len(result.pages) == 3
4856+
assert result.pages.get(FileId("code-examples/test.txt")) == None
4857+
assert result.pages.get(FileId("includes/code-examples/test.txt")) == None
4858+
4859+
# Allow rst files under code-examples to be parsed
4860+
assert result.pages[FileId("includes/code-examples/foo.rst")]
4861+
4862+
# Allow code-examples.txt files to be parsed
4863+
assert result.pages[FileId("code-examples.txt")]
4864+
4865+
assert len(result.diagnostics[FileId("index.txt")]) == 0
4866+
check_ast_testing_string(
4867+
result.pages[FileId("index.txt")].ast,
4868+
"""
4869+
<root fileid="index.txt">
4870+
<section>
4871+
<heading id="homepage"><text>Homepage</text></heading>
4872+
<paragraph><text>txt files inside of code-examples directories should not be parsed as reStructuredText.</text></paragraph>
4873+
<directive name="literalinclude">
4874+
<text>/code-examples/test.txt</text>
4875+
<code copyable="True">
4876+
This is a code example and should not be captured as a page.
4877+
</code>
4878+
</directive>
4879+
<directive name="literalinclude">
4880+
<text>/includes/code-examples/test.txt</text>
4881+
<code copyable="True">
4882+
This is another code example, but nested in a subdirectory, and should not be captured as a page.
4883+
</code>
4884+
</directive>
4885+
<paragraph><text>rst files inside of code-examples directories are okay to parse.</text></paragraph>
4886+
<directive name="include">
4887+
<text>/includes/code-examples/foo.rst</text>
4888+
<root fileid="includes/code-examples/foo.rst">
4889+
<paragraph><text>This file makes sure that rst files nested in a code-examples subdirectory is okay.</text></paragraph>
4890+
<directive name="warning">
4891+
<paragraph><text>This is a test.</text></paragraph>
4892+
</directive>
4893+
</root>
4894+
</directive>
4895+
</section>
4896+
</root>""",
4897+
)

snooty/util.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
PAT_URI = re.compile(r"^(?P<schema>[a-z]+)://")
6363
SOURCE_FILE_EXTENSIONS = {".txt", ".rst", ".yaml"}
6464
RST_EXTENSIONS = {".txt", ".rst"}
65+
RESERVED_DIRS = {"code-examples"}
6566
EXT_FOR_PAGE = ".txt"
6667
EMPTY_BLAKE2B = hashlib.blake2b(b"").hexdigest()
6768
SNOOTY_TOML = "snooty.toml"
@@ -175,7 +176,9 @@ def get_files(
175176

176177
path = Path(os.path.join(base, name))
177178
# Detect and ignore symlinks outside of our jail
178-
if is_relative_to(path.resolve(), must_be_relative_to):
179+
if is_relative_to(
180+
path.resolve(), must_be_relative_to
181+
) and not is_txt_in_reserved_dir(path):
179182
yield path
180183

181184

@@ -781,3 +784,16 @@ def parse_toml_and_add_line_info(text: str) -> Dict[str, Any]:
781784
raise TOMLDecodeErrorWithSourceInfo(message, text.count("\n") + 1) from err
782785

783786
raise err
787+
788+
789+
def is_txt_in_reserved_dir(path: Path) -> bool:
790+
if path.suffix != ".txt":
791+
return False
792+
793+
# Exclude checking files that have a reserved dir name AS the filename
794+
path_parts = path.parts[:-1]
795+
for part in path_parts:
796+
if part in RESERVED_DIRS:
797+
return True
798+
799+
return False

0 commit comments

Comments
 (0)