Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2,286 changes: 2,286 additions & 0 deletions boneset-api/data/final_skull.json

Large diffs are not rendered by default.

76 changes: 51 additions & 25 deletions boneset-api/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
// console.log(`🚀 Server running on http://127.0.0.1:${PORT}`);
//});

// boneset-api/server.js
// boneset-api/server.js
const express = require("express");
const axios = require("axios");
Expand All @@ -103,7 +104,7 @@ const PORT = process.env.PORT || 8000;

app.use(cors());

// ---- Existing GitHub sources used only by /combined-data (unchanged) ----
// ---- Existing GitHub sources used only by /combined-data (unchanged for Pelvis) ----
const GITHUB_REPO = "https://raw.githubusercontent.com/oss-slu/DigitalBonesBox/data/DataPelvis/";
const BONESET_JSON_URL = `${GITHUB_REPO}boneset/bony_pelvis.json`;
const BONES_DIR_URL = `${GITHUB_REPO}bones/`;
Expand All @@ -120,7 +121,7 @@ const bonesetLimiter = rateLimit({
});

// ---- Only allow bonesets we ship locally right now ----
const ALLOWED_BONESETS = new Set(["bony_pelvis"]);
const ALLOWED_BONESETS = new Set(["bony_pelvis", "skull"]);

// ---- Helpers ----
async function fetchJSON(url) {
Expand All @@ -145,7 +146,7 @@ function safeDataPath(fileName) {
return candidate;
}

// Tiny HTML escape (double-quotes everywhere for ESLint)
// Tiny HTML escape
function escapeHtml(str = "") {
return String(str).replace(/[&<>"']/g, (c) => ({
"&": "&amp;",
Expand All @@ -156,14 +157,23 @@ function escapeHtml(str = "") {
})[c]);
}

// Cache the merged boneset for fast description lookups
let cachedBoneset = null;
async function loadBoneset() {
if (cachedBoneset) return cachedBoneset;
const file = safeDataPath("final_bony_pelvis.json");
// ---- Load local final_* boneset JSON with a small cache ----
const bonesetCache = new Map();

async function loadBoneset(bonesetId) {
if (bonesetCache.has(bonesetId)) return bonesetCache.get(bonesetId);
const file = safeDataPath(`final_${bonesetId}.json`);
const raw = await fs.readFile(file, "utf8");
cachedBoneset = JSON.parse(raw);
return cachedBoneset;
const parsed = JSON.parse(raw);
bonesetCache.set(bonesetId, parsed);
return parsed;
}

// Load any local final_* boneset JSON by id (e.g., "skull")
async function loadLocalBoneset(id) {
const file = safeDataPath(`final_${id}.json`);
const raw = await fs.readFile(file, "utf8");
return JSON.parse(raw);
}

function findNodeById(boneset, id) {
Expand All @@ -183,64 +193,80 @@ app.get("/", (_req, res) => {
res.json({ message: "Welcome to the Boneset API (GitHub-Integrated)" });
});

// Unchanged: used by the dropdowns in the current UI
// Unchanged pelvis aggregation + add Skull from local final_skull.json
app.get("/combined-data", async (_req, res) => {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The route re-declares bonesets and mixes old pelvis logic with the new flow. You initialize bonesets twice and start a loop on bonesetData.bones while also fetching pelvis again inside that loop. This will either throw or produce wrong results. Clean this to one clear flow: build pelvis, then skull, then return

try {
const bonesetData = await fetchJSON(BONESET_JSON_URL);
if (!bonesetData) return res.status(500).json({ error: "Failed to load boneset data" });

const bonesets = [{ id: bonesetData.id, name: bonesetData.name }];
const bonesets = [];
const bones = [];
const subbones = [];

for (const boneId of bonesetData.bones) {
// --- Bony Pelvis (from GitHub) ---
const pelvis = await fetchJSON(BONESET_JSON_URL);
if (!pelvis) return res.status(500).json({ error: "Failed to load pelvis data" });
bonesets.push({ id: pelvis.id, name: pelvis.name });

for (const boneId of pelvis.bones) {
const boneJsonUrl = `${BONES_DIR_URL}${boneId}.json`;
const boneData = await fetchJSON(boneJsonUrl);
if (boneData) {
bones.push({ id: boneData.id, name: boneData.name, boneset: bonesetData.id });
bones.push({ id: boneData.id, name: boneData.name, boneset: pelvis.id });
(boneData.subBones || []).forEach((subBoneId) => {
subbones.push({ id: subBoneId, name: subBoneId.replace(/_/g, " "), bone: boneData.id });
});
}
}

// --- Skull (from local final_skull.json) ---
try {
const skull = await loadBoneset("skull");
bonesets.push({ id: skull.id, name: skull.name });
for (const b of skull.bones || []) {
bones.push({ id: b.id, name: b.name, boneset: skull.id });
for (const sb of b.subbones || []) {
subbones.push({ id: sb.id, name: sb.name, bone: b.id });
}
}
} catch (e) {
console.warn("Skull load failed:", e.message);
}

res.json({ bonesets, bones, subbones });
} catch (error) {
console.error("Error fetching combined data:", error.message);
res.status(500).json({ error: "Internal Server Error" });
}
});

// Serve description from the *selected* local merged JSON (supports pelvis & skull)
// Serve description from the local merged JSON (no SSRF)
app.get("/api/description", bonesetLimiter, async (req, res) => {
const boneId = String(req.query.boneId || "");
const bonesetId = String(req.query.bonesetId || "bony_pelvis");

// Basic allowlist-style validation
if (!/^[a-z0-9_]+$/.test(boneId)) {
if (!/^[a-z0-9_]+$/.test(boneId) || !ALLOWED_BONESETS.has(bonesetId)) {
return res.type("text/html").send("");
}

try {
const set = await loadBoneset();
const set = await loadLocalBoneset(bonesetId); // <- IMPORTANT
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You introduce a bonesetCache and a new loadLocalBoneset(id) while also keeping the old single-file cache fields (cachedBoneset, old loadBoneset() pattern). Pick one approach and remove the leftovers so readers aren’t confused.

const node = findNodeById(set, boneId);
if (!node) return res.type("text/html").send("");

const name = node.name || boneId.replace(/_/g, " ");
const lines = Array.isArray(node.description) ? node.description : [];

// HTMX expects an <li> list fragment
let html = `<li><strong>${escapeHtml(name)}</strong></li>`;
for (const line of lines) {
html += `<li>${escapeHtml(line)}</li>`;
}
for (const line of lines) html += `<li>${escapeHtml(line)}</li>`;
res.type("text/html").send(html);
} catch (err) {
console.error("description error:", err);
res.type("text/html").send("<li>Description not available.</li>");
}
});

// Safe path + allowlist + rate limit


// Safe path + allowlist + rate limit to fetch the full local JSON
app.get("/api/boneset/:bonesetId", bonesetLimiter, async (req, res) => {
const { bonesetId } = req.params;

Copy link
Collaborator

@UcheWendy UcheWendy Oct 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/api/description renders lines twice, The code appends the description lines in a loop, then immediately appends them again in a second loop. You’ll get duplicated (li) items.

Expand Down
72 changes: 52 additions & 20 deletions data_extraction/Extract_Bone_Descriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,51 +2,83 @@
import json
import os

# Heuristic: the description panel lives on the right side of the slide.
RIGHT_MIN_X = 8_011_000
RIGHT_MIN_Y = 3_000_000

def parse_slide_xml(xml_file, output_json_path):
tree = ET.parse(xml_file)
root = tree.getroot()

ns = {
'p': 'http://schemas.openxmlformats.org/presentationml/2006/main',
'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'
}

descriptions = []
bone_name = "Unknown"

for sp in root.findall(".//p:sp", ns):
xfrm = sp.find(".//a:xfrm", ns)
if xfrm is not None:
pos = xfrm.find("a:off", ns)
size = xfrm.find("a:ext", ns)

if pos is not None and size is not None:
x, y = int(pos.attrib.get("x", 0)), int(pos.attrib.get("y", 0))
width, height = int(size.attrib.get("cx", 0)), int(size.attrib.get("cy", 0))

# Range in which the descriptions are held
if x > 8011000 and y > 3000000:
x = int(pos.attrib.get("x", 0))
y = int(pos.attrib.get("y", 0))
width = int(size.attrib.get("cx", 0)) # kept for future use
height = int(size.attrib.get("cy", 0)) # kept for future use

# Range in which the descriptions are held (your original heuristic)
if x > RIGHT_MIN_X and y > RIGHT_MIN_Y:
text_elements = sp.findall(".//a:t", ns)
bullet_points = [t.text for t in text_elements if t.text and t.text != 'No Labels']

# Keep your rule, but make it robust to case/whitespace
bullet_points = [
t.text.strip()
for t in text_elements
if t.text and t.text.strip() and t.text.strip().lower() != 'no labels'
]

if bullet_points:
if bone_name == "Unknown":
bone_name = bullet_points[0] # Assign first extracted text as the bone name
bullet_points = bullet_points[1:] # Remove name from descriptions
# Assign first extracted text as the bone name
bone_name = bullet_points[0]
# Remove name from descriptions
bullet_points = bullet_points[1:]
descriptions.extend(bullet_points)

bone_data = {
"name": bone_name,
"id": bone_name.lower().replace(" ", "_"), # Generate an ID from the name
"description": descriptions
}


# Write result
with open(output_json_path, 'w') as f:
json.dump(bone_data, f, indent=4)

print(f"Descriptions saved to {output_json_path}")

# Example usage
xml_file = "/Users/joshbudzynski/Downloads/example_folder/ppt/slides/slide3.xml"
output_json = "slide3_Descriptions.json"
parse_slide_xml(xml_file, output_json)
def process_slides(slides_folder, output_dir):
"""Batch over slide XMLs and write slideN_Descriptions.json next to your other outputs."""
os.makedirs(output_dir, exist_ok=True)
written = 0

# Process slide*.xml in numeric order
for name in sorted(os.listdir(slides_folder), key=lambda n: (n.startswith("slide"), n)):
if not (name.startswith("slide") and name.endswith(".xml")):
continue
xml_file = os.path.join(slides_folder, name)
slide_base = os.path.splitext(name)[0]
out_path = os.path.join(output_dir, f"{slide_base}_Descriptions.json")
parse_slide_xml(xml_file, out_path)
written += 1

print(f"[ok] Wrote {written} description file(s) -> {output_dir}")

if __name__ == "__main__":
# Your skull paths
slides_folder = "data_extraction/skull/ppt/unzipped/ppt/slides"
json_output = "data_extraction/skull/annotations"
process_slides(slides_folder, json_output)
25 changes: 14 additions & 11 deletions data_extraction/extract_ppt_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,15 +175,18 @@ def process_pptx_folders(slides_folder, rels_folder, media_folder, output_folder
else:
print(f"[WARNING] Missing relationship file: {rels_path}. Skipping {slide_file}.")


if __name__ == "__main__":
# Folder paths (replace with your paths)
slides_folder = "/Users/joshbudzynski/Downloads/example_folder/ppt/slides"
rels_folder = "/Users/joshbudzynski/Downloads/example_folder/ppt/slides/_rels"
media_folder = "/Users/joshbudzynski/Downloads/example_folder/ppt/media"
output_folder = "/Users/joshbudzynski/Downloads/example_folder/ppt/AutomatedScript"
json_output = "/Users/joshbudzynski/Downloads/example_folder/ppt/json_output"
json_directory = "/Users/joshbudzynski/Downloads/example_folder/ppt/data/json"

# Run the process for all slides
process_pptx_folders(slides_folder, rels_folder, media_folder, output_folder, json_output, json_directory)
# Skull paths (relative to repo root)
slides_folder = "data_extraction/skull/ppt/unzipped/ppt/slides"
rels_folder = "data_extraction/skull/ppt/unzipped/ppt/slides/_rels"
media_folder = "data_extraction/skull/ppt/unzipped/ppt/media"
output_folder = "data_extraction/skull/images"
json_output = "data_extraction/skull/annotations"

# Optional: directory with bonesets/bones/subbones name lists (ok if missing)
json_directory = "data_extraction/skull/json"

process_pptx_folders(
slides_folder, rels_folder, media_folder,
output_folder, json_output, json_directory
)
5 changes: 5 additions & 0 deletions data_extraction/skull/annotations/slide10_Descriptions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"name": "Labels",
"id": "labels",
"description": []
}
49 changes: 49 additions & 0 deletions data_extraction/skull/annotations/slide10_annotations.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"slide": "slide10",
"images": [
{
"rId": "rId8",
"extracted_name": "slide10_rId8.jpg"
}
],
"annotations": [
{
"text": "",
"position": {
"x": 2743200,
"y": 1299410,
"width": 5943600,
"height": 4952999
}
},
{
"text": "Anterior view ",
"position": {
"x": 5372100,
"y": 6252408,
"width": 685800,
"height": 215444
},
"link": null
},
{
"text": "",
"position": {
"x": 3444876,
"y": 2397125,
"width": 4333875,
"height": 3282950
}
},
{
"text": "Mental tubercle",
"position": {
"x": 3581400,
"y": 5739596,
"width": 838200,
"height": 215444
},
"link": null
}
]
}
19 changes: 19 additions & 0 deletions data_extraction/skull/annotations/slide11_Descriptions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"name": "Lateral view",
"id": "lateral_view",
"description": [
"The lateral view can be divided into 3 sections:",
"Facial \u2013 anterior",
"Temporal \u2013 middle",
"Occipital \u2013 posterior",
"The temporal section is separated by the zygomatic arch into into a",
"temporal fossa",
"superiorly and an infratemporal fossa inferiorly.",
"The origin of the temporalis muscle attaches to the temporal fossa",
"The",
"pterion",
"is located in the temporal fossa and marks the junction where the frontal, parietal, temporal, and sphenoid bones meet.",
"The pterion is an important landmark because it overlies the anterior branch of the middle meningeal artery.",
"Damage to this area of the skull can cause a rupture of this artery"
]
}
Loading
Loading