Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 71 additions & 63 deletions components/rsptx/build_tools/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,15 @@
from sqlalchemy.sql import text

# todo: use our logger
import logging
from rsptx.logging import rslogger
from runestone.server import get_dburl
from rsptx.db.models import Library, LibraryValidator
from rsptx.db.crud import update_source_code_sync
from rsptx.response_helpers.core import canonical_utcnow
import pdb

rslogger.setLevel("WARNING")
rslogger.setLevel(logging.DEBUG)

# Local packages
# --------------
Expand Down Expand Up @@ -152,7 +153,7 @@ def _build_ptx_book(config, gen, manifest, course, click=click, target="runeston

if not os.path.exists("project.ptx"):
click.echo("PreTeXt books need a project.ptx file")
return {"completed": False, "status": "Missing project.ptx file"}
return {"completed": False, "status": "Missing project.ptx file"}
else:
click.echo("Checking files")
if not target:
Expand All @@ -161,7 +162,7 @@ def _build_ptx_book(config, gen, manifest, course, click=click, target="runeston
# and {"host-platform": "runestone"} in stringparams
rs = check_project_ptx(click=click, course=course, target=target)
if not rs:
return {"completed": False, "status": "Bad configuration in project.ptx"}
return {"completed": False, "status": "Bad configuration in project.ptx"}

logger = logging.getLogger("ptxlogger")
string_io_handler = StringIOHandler()
Expand Down Expand Up @@ -196,7 +197,10 @@ def _build_ptx_book(config, gen, manifest, course, click=click, target="runeston
res = copytree(rs.output_dir_abspath(), book_path, dirs_exist_ok=True)
if not res:
click.echo("Error copying files to published")
return {"completed": False, "status": "Error copying files to published"}
return {
"completed": False,
"status": "Error copying files to published",
}
else:
click.echo("No need to copy files to published")
click.echo("Book deployed successfully")
Expand All @@ -222,12 +226,10 @@ def _build_ptx_book(config, gen, manifest, course, click=click, target="runeston
or "Traceback" in log_string
or "compilation failed" in log_string
):
click.echo(
"Nonfatal errors in build, check the log for details"
)
click.echo("Nonfatal errors in build, check the log for details")
return {"completed": True, "status": "Nonfatal errors in build"}
click.echo("Build completed successfully")
return {"completed": True, "status": "Build completed successfully"}
return {"completed": True, "status": "Build completed successfully"}


# Support Functions
Expand Down Expand Up @@ -328,12 +330,12 @@ def extract_docinfo(tree, string, attr=None, click=click):
"""
authstr = ""
if string == "author":
el = tree.findall(f"./{string}")
el = tree.xpath(f"//{string}")
for a in el:
authstr += ET.tostring(a, encoding="unicode", method="text").strip() + ", "
authstr += a.text.strip() + ", "
authstr = authstr[:-2]
return authstr
el = tree.find(f"./{string}")
el = tree.xpath(f".//{string}")[0]
if attr is not None and el is not None:
print(f"{el.attrib[attr]=}")
return el.attrib[attr].strip()
Expand All @@ -360,8 +362,10 @@ def update_library(
# This is a bit of a hack for now... todo: continue to refactor these to use crud functions
eng = create_engine(config.dburl.replace("+asyncpg", ""))
if build_system == "PTX":
tree = ET.parse(mpath)
docinfo = tree.find("./library-metadata")
parser = ET.HTMLParser(encoding="utf-8")
tree = ET.parse(mpath, parser)
docinfo_list = tree.xpath("//library-metadata")
docinfo = docinfo_list[0] if docinfo_list else None
title = extract_docinfo(docinfo, "title")
subtitle = extract_docinfo(docinfo, "subtitle")
description = extract_docinfo(docinfo, "blurb")
Expand Down Expand Up @@ -576,8 +580,10 @@ def _initialize_db_context(engine, sess, course_name, manifest_path):
assignment_questions = Table("assignment_questions", meta, autoload_with=engine)

# Get the author name from the manifest
tree = ET.parse(manifest_path)
docinfo = tree.find("./library-metadata")
parser = ET.HTMLParser(encoding="utf-8")
tree = ET.parse(manifest_path, parser)
docinfo_list = tree.xpath("//library-metadata")
docinfo = docinfo_list[0] if docinfo_list else None
author = extract_docinfo(docinfo, "author")
res = sess.execute(book_author.select().where(book_author.c.book == course_name))
book_author_data = res.first()
Expand Down Expand Up @@ -632,11 +638,12 @@ def _process_chapters(sess, db_context, course_name, manifest_path):
"""Process all chapters from the manifest."""
rslogger.info("Populating the database with Chapter information")

tree = ET.parse(manifest_path)
parser = ET.HTMLParser(encoding="utf-8")
tree = ET.parse(manifest_path, parser)
root = tree.getroot()
chap = 0

for chapter in root.findall("./chapter"):
for chapter in root.xpath("//chapter"):
chap += 1
chapid = _process_single_chapter(sess, db_context, chapter, chap, course_name)
_process_subchapters(sess, db_context, chapter, chapid, course_name)
Expand All @@ -646,7 +653,8 @@ def _process_appendices(sess, db_context, course_name, manifest_path):
"""Process all appendices from the manifest."""
rslogger.info("Populating the database with Appendix information")

tree = ET.parse(manifest_path)
parser = ET.HTMLParser(encoding="utf-8")
tree = ET.parse(manifest_path, parser)
root = tree.getroot()

for appendix in root.findall("./appendix"):
Expand All @@ -659,20 +667,19 @@ def _process_appendices(sess, db_context, course_name, manifest_path):

def _process_single_chapter(sess, db_context, chapter, chap_num, course_name):
"""Process a single chapter and return its database ID."""
cnum = chapter.find("./number").text
cnum = chapter.xpath(".//number")[0].text
if not cnum:
cnum = ""
rslogger.debug(
f"{chapter.tag} {chapter.find('./id').text} {chapter.find('./title').text}"
rslogger.info(
f"{chapter.tag} {chapter.xpath('.//id')[0].text} {chapter.xpath('.//title')[0].text}"
)

ins = (
db_context["chapters"]
.insert()
.values(
chapter_name=f"{cnum} {chapter.find('./title').text}",
chapter_name=f"{cnum} {chapter.xpath('.//title')[0].text}",
course_id=course_name,
chapter_label=chapter.find("./id").text,
chapter_label=chapter.xpath(".//id")[0].text,
chapter_num=chap_num,
)
)
Expand All @@ -684,7 +691,7 @@ def _process_subchapters(sess, db_context, chapter, chapid, course_name):
"""Process all subchapters for a given chapter."""
subchap = 0

for subchapter in chapter.findall("./subchapter"):
for subchapter in chapter.xpath(".//subchapter"):
# check if this subchapter has a time-limit attribute
if "data-time" in subchapter.attrib:
_process_single_timed_assignment(
Expand All @@ -694,7 +701,7 @@ def _process_subchapters(sess, db_context, chapter, chapid, course_name):
# look for a subsubchapter with a time-limit attribute
# at this point (7/28/2025) the only reason for a subsubchapter
# is to have a timed assignment, so we can skip the rest of the
for subsubchapter in subchapter.findall("./subsubchapter"):
for subsubchapter in subchapter.xpath(".//subsubchapter"):
if "data-time" in subsubchapter.attrib:
_process_single_timed_assignment(
sess,
Expand All @@ -715,22 +722,20 @@ def _process_single_subchapter(
sess, db_context, chapter, subchapter, chapid, subchap_num, course_name
):
"""Process a single subchapter and its contents."""
scnum = subchapter.find("./number").text
scnum = subchapter.xpath(".//number")[0].text
if not scnum:
scnum = ""
chap_xmlid = subchapter.find("./id").text
rslogger.debug(f"subchapter {chap_xmlid}")
chap_xmlid = subchapter.xpath(".//id")[0].text
rslogger.info(f"subchapter {chap_xmlid}")

if not chap_xmlid:
rslogger.error(f"Missing id tag in subchapter {subchapter}")

# Build subchapter title
titletext = subchapter.find("./title").text
titletext = subchapter.xpath(".//title")[0].text
if not titletext:
rslogger.debug(f"constructing title for subchapter {chap_xmlid}")
titletext = " ".join(
[ET.tostring(y).decode("utf8") for y in subchapter.findall("./title/*")]
)
rslogger.info(f"constructing title for subchapter {chap_xmlid}")
titletext = " ".join(subchapter.xpath(".//title")[0].itertext())
titletext = scnum + " " + titletext.strip()

# Insert subchapter
Expand All @@ -740,7 +745,7 @@ def _process_single_subchapter(
.values(
sub_chapter_name=titletext,
chapter_id=chapid,
sub_chapter_label=subchapter.find("./id").text,
sub_chapter_label=subchapter.xpath(".//id")[0].text,
skipreading="F",
sub_chapter_num=subchap_num,
)
Expand Down Expand Up @@ -853,10 +858,10 @@ def _process_single_timed_assignment(
):
"""Process a timed assignment subchapter."""
rslogger.info("Processing timed assignment subchapter")
titletext = subchapter.find("./title").text.strip()
titletext = subchapter.xpath(".//title")[0].text.strip()
if not titletext:
titletext = "Timed Assignment"
timed_id = subchapter.find("./id").text
timed_id = subchapter.xpath(".//id")[0].text
time_limit = subchapter.attrib.get("data-time", "0")
# no-result, no-feedback, no-pause
show_feedback = "F" if subchapter.attrib.get("data-no-feedback", "") else "T"
Expand All @@ -883,13 +888,14 @@ def _process_single_timed_assignment(

# Now search for questions in this subchapter
qnum = 0
for question in subchapter.findall("./question"):
for question in subchapter.xpath(".//question"):
qnum += 1
# Extract question content
dbtext = " ".join(
[ET.tostring(y).decode("utf8") for y in question.findall("./htmlsrc/*")]
htmlsrc = question.xpath(".//htmlsrc")[0]
dbtext = "".join(
ET.tostring(child, encoding="utf-8", method="html").decode("utf-8") for child in htmlsrc
)
qlabel = " ".join([y.text for y in question.findall("./label")])
qlabel = " ".join(question.xpath(".//label")[0].itertext())

# Get question element and metadata
el, idchild, old_ww_id, qtype = _extract_question_metadata(question, dbtext)
Expand All @@ -900,9 +906,9 @@ def _process_single_timed_assignment(

# Build question data
if parent is not None:
subchap_label = parent.find("./id").text
subchap_label = parent.xpath(".//id")[0].text
else:
subchap_label = subchapter.find("./id").text
subchap_label = subchapter.xpath(".//id")[0].text
valudict = dict(
base_course=course_name,
name=idchild,
Expand All @@ -912,9 +918,9 @@ def _process_single_timed_assignment(
htmlsrc=dbtext,
autograde=_determine_autograde(dbtext),
from_source="T",
chapter=chapter.find("./id").text,
chapter=chapter.xpath(".//id")[0].text,
subchapter=subchap_label,
topic=f"{chapter.find('./id').text}/{subchapter.find('./id').text}",
topic=f"{chapter.xpath('.//id')[0].text}/{subchapter.xpath('.//id')[0].text}",
qnumber=qlabel,
optional="F",
practice="F",
Expand All @@ -931,7 +937,7 @@ def _process_single_timed_assignment(

def _add_page_question(sess, db_context, chapter, subchapter, course_name):
"""Add a page entry to the questions table for this chapter/subchapter."""
name = f"{chapter.find('./title').text}/{subchapter.find('./title').text}"
name = f"{chapter.xpath('.//title')[0].text}/{subchapter.xpath('.//title')[0].text}"

res = sess.execute(
text(
Expand All @@ -946,8 +952,8 @@ def _add_page_question(sess, db_context, chapter, subchapter, course_name):
timestamp=datetime.datetime.now(),
is_private="F",
question_type="page",
subchapter=subchapter.find("./id").text,
chapter=chapter.find("./id").text,
subchapter=subchapter.xpath(".//id")[0].text,
chapter=chapter.xpath(".//id")[0].text,
from_source="T",
author=db_context["author"],
owner=db_context["owner"],
Expand All @@ -973,7 +979,7 @@ def _add_page_question(sess, db_context, chapter, subchapter, course_name):

def _process_questions(sess, db_context, chapter, subchapter, course_name):
"""Process all questions in a subchapter."""
for question in subchapter.findall("./question"):
for question in subchapter.xpath(".//question"):
_process_single_question(
sess, db_context, chapter, subchapter, question, course_name
)
Expand All @@ -984,14 +990,15 @@ def _process_single_question(
):
"""Process a single question element."""
# Extract question content
dbtext = " ".join(
[ET.tostring(y).decode("utf8") for y in question.findall("./htmlsrc/*")]
htmlsrc = question.xpath(".//htmlsrc")[0]
#
dbtext = "".join(
ET.tostring(child, encoding="utf-8", method="html").decode("utf-8") for child in htmlsrc
)
qlabel = " ".join([y.text for y in question.findall("./label")])

qlabel = " ".join(question.xpath(".//label")[0].itertext())
print(f"dbtext = {dbtext}")
# Get question element and metadata
el, idchild, old_ww_id, qtype = _extract_question_metadata(question, dbtext)

# Handle webwork case where we need to update dbtext
if qtype == "webwork" and el is not None:
dbtext = ET.tostring(el).decode("utf8")
Expand All @@ -1005,8 +1012,8 @@ def _process_single_question(
dbtext = _fix_image_urls(dbtext, db_context, course_name)

# Build question data
sbc = subchapter.find("./id").text
cpt = chapter.find("./id").text
sbc = subchapter.xpath(".//id")[0].text
cpt = chapter.xpath(".//id")[0].text
valudict = dict(
base_course=course_name,
name=idchild,
Expand Down Expand Up @@ -1037,15 +1044,15 @@ def _process_single_question(

def _extract_question_metadata(question, dbtext):
"""Extract metadata from a question element."""
el = question.find(".//*[@data-component]")
el = question.xpath(".//*[@data-component]")[0]
old_ww_id = None

if el is not None:
idchild = el.attrib.get("id", "fix_me")
if "the-id-on-the-webwork" in el.attrib:
old_ww_id = el.attrib["the-id-on-the-webwork"]
else:
el = question.find("./div")
el = question.xpath(".//div")[0]
if el is None:
idchild = "fix_me"
rslogger.error(
Expand All @@ -1058,7 +1065,7 @@ def _extract_question_metadata(question, dbtext):
try:
qtype = el.attrib["data-component"]
if qtype == "codelens":
id_el = el.find("./*[@class='pytutorVisualizer']")
id_el = el.xpath(".//*[@class='pytutorVisualizer']")[0]
idchild = id_el.attrib["id"]
qtype = QT_MAP.get(qtype, qtype)
except Exception:
Expand Down Expand Up @@ -1165,7 +1172,7 @@ def _handle_datafile(el, course_name):

def _process_source_elements(sess, subchapter, course_name):
"""Process source elements in a subchapter."""
for sourceEl in subchapter.findall("./source"):
for sourceEl in subchapter.xpath(".//source"):
id = sourceEl.attrib["id"]
file_contents = sourceEl.text
filename = sourceEl.attrib.get("filename", sourceEl.attrib["id"])
Expand All @@ -1180,13 +1187,14 @@ def _process_source_elements(sess, subchapter, course_name):

def _set_course_attributes(sess, db_context, course_name, manifest_path):
"""Set course attributes from the manifest."""
tree = ET.parse(manifest_path)
parser = ET.HTMLParser(encoding="utf-8")
tree = ET.parse(manifest_path, parser)
root = tree.getroot()

latex = root.find("./latex-macros")
latex = root.xpath(".//latex-macros")[0]
rslogger.info("Setting attributes for this base course")

ww_meta = root.find("./webwork-version")
ww_meta = root.xpath(".//webwork-version")[0]
if ww_meta is not None:
ww_major = ww_meta.attrib["major"]
ww_minor = ww_meta.attrib["minor"]
Expand Down