Skip to content
This repository was archived by the owner on Jun 7, 2023. It is now read-only.

Commit 9413526

Browse files
authored
Merge pull request #1325 from bjones1/divid-checker
Divid checker
2 parents a08e850 + ed6656e commit 9413526

File tree

1 file changed

+34
-9
lines changed

1 file changed

+34
-9
lines changed

runestone/common/runestonedirective.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,17 @@
1919
from collections import defaultdict
2020
import binascii
2121
import os
22+
import re
2223

2324
from docutils import nodes
2425
from docutils.parsers.rst import directives
2526
from docutils.parsers.rst import Directive
2627
from docutils.utils import get_source_line
2728
from docutils.statemachine import ViewList
28-
2929
from sphinx import application
3030
from sphinx.errors import ExtensionError
31+
from sphinx.util import logging
32+
3133

3234
UNNUMBERED_DIRECTIVES = [
3335
# "activecode",
@@ -44,6 +46,8 @@
4446
"disqus",
4547
]
4648

49+
logger = logging.getLogger(__name__)
50+
4751

4852
# Provide a class which all Runestone nodes will inherit from.
4953
class RunestoneNode(nodes.Node):
@@ -203,6 +207,14 @@ def setup(app):
203207
app.add_config_value("generate_component_labels", True, "env")
204208

205209

210+
# Roughly what an XML ID allows, per the spec for an `XML Name <https://www.w3.org/TR/REC-xml/#NT-Name>`_. However:
211+
#
212+
# - This regex does allow identifiers which begin with a number, which isn't allowed by XML. I don't know of easy ways to implement an AND operation in a regex, so code which uses this regex checks this separately.
213+
# - This disallows characters that need escaping for CSS to avoid problems there, such as ``.`` and ``:``
214+
# - I'm not certain how closely Python's definition of a "word character (\w)" matches XML's definition.
215+
xml_id_regex = re.compile(r"\w[\w-]*", re.UNICODE)
216+
217+
206218
# A base class for all Runestone directives.
207219
class RunestoneDirective(Directive):
208220
option_spec = {
@@ -261,10 +273,22 @@ def __init__(self, *args, **kwargs):
261273
else:
262274
self.int_points = 1
263275

264-
self.options['optclass'] = self.options.get('class', "")
276+
self.options["optclass"] = self.options.get("class", "")
265277

266278
self.explain_text = []
267279

280+
# Check for a valid XML id. This is more restrictive then checking for a `valid HTML5 divid <https://html.spec.whatwg.org/multipage/dom.html#the-id-attribute>`_, so we don't bother with a separate HTML ID check.
281+
def validate_divid(self, divid):
282+
if (
283+
# Look for invalid XML IDs (they must not begin with a number, which the regex doesn't catch). Use ``fullmatch`` since the entire string must match the regex for an valid id.
284+
(divid[0] >= "0" and (divid[0] <= "9"))
285+
or not re.fullmatch(xml_id_regex, divid)
286+
):
287+
logger.error(
288+
f"Invalid divid '{divid}'.",
289+
location=self.state_machine.get_source_and_line(self.lineno),
290+
)
291+
268292

269293
# This is a base class for all Runestone directives which require a divid as their first parameter.
270294
class RunestoneIdDirective(RunestoneDirective):
@@ -318,9 +342,10 @@ def run(self):
318342
# Make sure the runestone directive at least requires an ID.
319343
assert self.required_arguments >= 1
320344
if "divid" not in self.options:
321-
id_ = self.options["divid"] = self.arguments[0]
345+
divid = self.options["divid"] = self.arguments[0]
322346
else:
323-
id_ = self.options["divid"]
347+
divid = self.options["divid"]
348+
self.validate_divid(divid)
324349

325350
self.options["qnumber"] = self.getNumber()
326351
# print(f"{id_} is number {self.options['qnumber']}")
@@ -331,19 +356,19 @@ def run(self):
331356
id_to_page = runestone_data.id_to_page
332357
page_to_id = runestone_data.page_to_id
333358
# See if this ID already exists.
334-
if id_ in id_to_page:
335-
page = id_to_page[id_]
359+
if divid in id_to_page:
360+
page = id_to_page[divid]
336361
# If it's not simply an update to an existing ID, complain.
337362
if page.docname != env.docname or page.lineno != self.lineno:
338363
raise self.error(
339364
"Duplicate ID -- see {}, line {}".format(page.docname, page.lineno)
340365
)
341366
# Make sure our data structure is consistent.
342-
assert id_ in page_to_id[page.docname]
367+
assert divid in page_to_id[page.docname]
343368
else:
344369
# Add a new entry.
345-
id_to_page[id_] = Struct(docname=env.docname, lineno=self.lineno)
346-
page_to_id[env.docname].add(id_)
370+
id_to_page[divid] = Struct(docname=env.docname, lineno=self.lineno)
371+
page_to_id[env.docname].add(divid)
347372

348373
self.in_exam = getattr(env, "in_timed", "")
349374

0 commit comments

Comments
 (0)