oss-slu · Taktar · Sep 29, 2025 · Sep 29, 2025 · Oct 3, 2025 · Oct 3, 2025
@@ -90,6 +90,7 @@
 //    console.log(`🚀 Server running on http://127.0.0.1:${PORT}`);
 //});
 
+// boneset-api/server.js
 // boneset-api/server.js
 const express = require("express");
 const axios = require("axios");
@@ -103,7 +104,7 @@ const PORT = process.env.PORT || 8000;
 
 app.use(cors());
 
-// ---- Existing GitHub sources used only by /combined-data (unchanged) ----
+// ---- Existing GitHub sources used only by /combined-data (unchanged for Pelvis) ----
 const GITHUB_REPO = "https://raw.githubusercontent.com/oss-slu/DigitalBonesBox/data/DataPelvis/";
 const BONESET_JSON_URL = `${GITHUB_REPO}boneset/bony_pelvis.json`;
 const BONES_DIR_URL = `${GITHUB_REPO}bones/`;
@@ -120,7 +121,7 @@ const bonesetLimiter = rateLimit({
 });
 
 // ---- Only allow bonesets we ship locally right now ----
-const ALLOWED_BONESETS = new Set(["bony_pelvis"]);
+const ALLOWED_BONESETS = new Set(["bony_pelvis", "skull"]);
 
 // ---- Helpers ----
 async function fetchJSON(url) {
@@ -145,7 +146,7 @@ function safeDataPath(fileName) {
   return candidate;
 }
 
-// Tiny HTML escape (double-quotes everywhere for ESLint)
+// Tiny HTML escape
 function escapeHtml(str = "") {
   return String(str).replace(/[&<>"']/g, (c) => ({
     "&": "&amp;",
@@ -156,14 +157,23 @@ function escapeHtml(str = "") {
   })[c]);
 }
 
-// Cache the merged boneset for fast description lookups
-let cachedBoneset = null;
-async function loadBoneset() {
-  if (cachedBoneset) return cachedBoneset;
-  const file = safeDataPath("final_bony_pelvis.json");
+// ---- Load local final_* boneset JSON with a small cache ----
+const bonesetCache = new Map();
+
+async function loadBoneset(bonesetId) {
+  if (bonesetCache.has(bonesetId)) return bonesetCache.get(bonesetId);
+  const file = safeDataPath(`final_${bonesetId}.json`);
   const raw = await fs.readFile(file, "utf8");
-  cachedBoneset = JSON.parse(raw);
-  return cachedBoneset;
+  const parsed = JSON.parse(raw);
+  bonesetCache.set(bonesetId, parsed);
+  return parsed;
+}
+
+// Load any local final_* boneset JSON by id (e.g., "skull")
+async function loadLocalBoneset(id) {
+  const file = safeDataPath(`final_${id}.json`);
+  const raw = await fs.readFile(file, "utf8");
+  return JSON.parse(raw);
 }
 
 function findNodeById(boneset, id) {
@@ -183,64 +193,80 @@ app.get("/", (_req, res) => {
   res.json({ message: "Welcome to the Boneset API (GitHub-Integrated)" });
 });
 
-// Unchanged: used by the dropdowns in the current UI
+// Unchanged pelvis aggregation + add Skull from local final_skull.json
 app.get("/combined-data", async (_req, res) => {
   try {
-    const bonesetData = await fetchJSON(BONESET_JSON_URL);
-    if (!bonesetData) return res.status(500).json({ error: "Failed to load boneset data" });
-
-    const bonesets = [{ id: bonesetData.id, name: bonesetData.name }];
+    const bonesets = [];
     const bones = [];
     const subbones = [];
 
-    for (const boneId of bonesetData.bones) {
+    // --- Bony Pelvis (from GitHub) ---
+    const pelvis = await fetchJSON(BONESET_JSON_URL);
+    if (!pelvis) return res.status(500).json({ error: "Failed to load pelvis data" });
+    bonesets.push({ id: pelvis.id, name: pelvis.name });
+
+    for (const boneId of pelvis.bones) {
       const boneJsonUrl = `${BONES_DIR_URL}${boneId}.json`;
       const boneData = await fetchJSON(boneJsonUrl);
       if (boneData) {
-        bones.push({ id: boneData.id, name: boneData.name, boneset: bonesetData.id });
+        bones.push({ id: boneData.id, name: boneData.name, boneset: pelvis.id });
         (boneData.subBones || []).forEach((subBoneId) => {
           subbones.push({ id: subBoneId, name: subBoneId.replace(/_/g, " "), bone: boneData.id });
         });
       }
     }
 
+    // --- Skull (from local final_skull.json) ---
+    try {
+      const skull = await loadBoneset("skull");
+      bonesets.push({ id: skull.id, name: skull.name });
+      for (const b of skull.bones || []) {
+        bones.push({ id: b.id, name: b.name, boneset: skull.id });
+        for (const sb of b.subbones || []) {
+          subbones.push({ id: sb.id, name: sb.name, bone: b.id });
+        }
+      }
+    } catch (e) {
+      console.warn("Skull load failed:", e.message);
+    }
+
     res.json({ bonesets, bones, subbones });
   } catch (error) {
     console.error("Error fetching combined data:", error.message);
     res.status(500).json({ error: "Internal Server Error" });
   }
 });
 
+// Serve description from the *selected* local merged JSON (supports pelvis & skull)
 // Serve description from the local merged JSON (no SSRF)
 app.get("/api/description", bonesetLimiter, async (req, res) => {
   const boneId = String(req.query.boneId || "");
+  const bonesetId = String(req.query.bonesetId || "bony_pelvis");
 
-  // Basic allowlist-style validation
-  if (!/^[a-z0-9_]+$/.test(boneId)) {
+  if (!/^[a-z0-9_]+$/.test(boneId) || !ALLOWED_BONESETS.has(bonesetId)) {
     return res.type("text/html").send("");
   }
 
   try {
-    const set = await loadBoneset();
+    const set = await loadLocalBoneset(bonesetId); // <- IMPORTANT
     const node = findNodeById(set, boneId);
     if (!node) return res.type("text/html").send("");
 
     const name = node.name || boneId.replace(/_/g, " ");
     const lines = Array.isArray(node.description) ? node.description : [];
 
-    // HTMX expects an <li> list fragment
     let html = `<li><strong>${escapeHtml(name)}</strong></li>`;
-    for (const line of lines) {
-      html += `<li>${escapeHtml(line)}</li>`;
-    }
+    for (const line of lines) html += `<li>${escapeHtml(line)}</li>`;
     res.type("text/html").send(html);
   } catch (err) {
     console.error("description error:", err);
     res.type("text/html").send("<li>Description not available.</li>");
   }
 });
 
-// Safe path + allowlist + rate limit
+
+
+// Safe path + allowlist + rate limit to fetch the full local JSON
 app.get("/api/boneset/:bonesetId", bonesetLimiter, async (req, res) => {
   const { bonesetId } = req.params;
 

@@ -2,51 +2,83 @@
 import json
 import os
 
+# Heuristic: the description panel lives on the right side of the slide.
+RIGHT_MIN_X = 8_011_000
+RIGHT_MIN_Y = 3_000_000
+
 def parse_slide_xml(xml_file, output_json_path):
     tree = ET.parse(xml_file)
     root = tree.getroot()
-    
+
     ns = {
         'p': 'http://schemas.openxmlformats.org/presentationml/2006/main',
         'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'
     }
-    
+
     descriptions = []
     bone_name = "Unknown"
-    
+
     for sp in root.findall(".//p:sp", ns):
         xfrm = sp.find(".//a:xfrm", ns)
         if xfrm is not None:
             pos = xfrm.find("a:off", ns)
             size = xfrm.find("a:ext", ns)
-            
+
             if pos is not None and size is not None:
-                x, y = int(pos.attrib.get("x", 0)), int(pos.attrib.get("y", 0))
-                width, height = int(size.attrib.get("cx", 0)), int(size.attrib.get("cy", 0))
-
-                # Range in which the descriptions are held
-                if x > 8011000 and y > 3000000:  
+                x = int(pos.attrib.get("x", 0))
+                y = int(pos.attrib.get("y", 0))
+                width = int(size.attrib.get("cx", 0))   # kept for future use
+                height = int(size.attrib.get("cy", 0))  # kept for future use
+
+                # Range in which the descriptions are held (your original heuristic)
+                if x > RIGHT_MIN_X and y > RIGHT_MIN_Y:
                     text_elements = sp.findall(".//a:t", ns)
-                    bullet_points = [t.text for t in text_elements if t.text and t.text != 'No Labels']
-
+                    # Keep your rule, but make it robust to case/whitespace
+                    bullet_points = [
+                        t.text.strip()
+                        for t in text_elements
+                        if t.text and t.text.strip() and t.text.strip().lower() != 'no labels'
+                    ]
+
                     if bullet_points:
                         if bone_name == "Unknown":
-                            bone_name = bullet_points[0]  # Assign first extracted text as the bone name
-                            bullet_points = bullet_points[1:]  # Remove name from descriptions
+                            # Assign first extracted text as the bone name
+                            bone_name = bullet_points[0]
+                            # Remove name from descriptions
+                            bullet_points = bullet_points[1:]
                         descriptions.extend(bullet_points)
-    
+
     bone_data = {
         "name": bone_name,
         "id": bone_name.lower().replace(" ", "_"),  # Generate an ID from the name
         "description": descriptions
     }
-
+
+    # Write result
     with open(output_json_path, 'w') as f:
         json.dump(bone_data, f, indent=4)
-    
+
     print(f"Descriptions saved to {output_json_path}")
 
-# Example usage
-xml_file = "/Users/joshbudzynski/Downloads/example_folder/ppt/slides/slide3.xml"
-output_json = "slide3_Descriptions.json"
-parse_slide_xml(xml_file, output_json)
+def process_slides(slides_folder, output_dir):
+    """Batch over slide XMLs and write slideN_Descriptions.json next to your other outputs."""
+    os.makedirs(output_dir, exist_ok=True)
+    written = 0
+
+    # Process slide*.xml in numeric order
+    for name in sorted(os.listdir(slides_folder), key=lambda n: (n.startswith("slide"), n)):
+        if not (name.startswith("slide") and name.endswith(".xml")):
+            continue
+        xml_file = os.path.join(slides_folder, name)
+        slide_base = os.path.splitext(name)[0]
+        out_path = os.path.join(output_dir, f"{slide_base}_Descriptions.json")
+        parse_slide_xml(xml_file, out_path)
+        written += 1
+
+    print(f"[ok] Wrote {written} description file(s) -> {output_dir}")
+
+if __name__ == "__main__":
+    # Your skull paths
+    slides_folder = "data_extraction/skull/ppt/unzipped/ppt/slides"
+    json_output   = "data_extraction/skull/annotations"
+    process_slides(slides_folder, json_output)
@@ -175,15 +175,18 @@ def process_pptx_folders(slides_folder, rels_folder, media_folder, output_folder
             else:
                 print(f"[WARNING] Missing relationship file: {rels_path}. Skipping {slide_file}.")
 
-
 if __name__ == "__main__":
-    # Folder paths (replace with your paths)
-    slides_folder = "/Users/joshbudzynski/Downloads/example_folder/ppt/slides"
-    rels_folder = "/Users/joshbudzynski/Downloads/example_folder/ppt/slides/_rels"
-    media_folder = "/Users/joshbudzynski/Downloads/example_folder/ppt/media"
-    output_folder = "/Users/joshbudzynski/Downloads/example_folder/ppt/AutomatedScript"
-    json_output = "/Users/joshbudzynski/Downloads/example_folder/ppt/json_output"
-    json_directory = "/Users/joshbudzynski/Downloads/example_folder/ppt/data/json"
-
-    # Run the process for all slides
-    process_pptx_folders(slides_folder, rels_folder, media_folder, output_folder, json_output, json_directory)
+    # Skull paths (relative to repo root)
+    slides_folder = "data_extraction/skull/ppt/unzipped/ppt/slides"
+    rels_folder   = "data_extraction/skull/ppt/unzipped/ppt/slides/_rels"
+    media_folder  = "data_extraction/skull/ppt/unzipped/ppt/media"
+    output_folder = "data_extraction/skull/images"
+    json_output   = "data_extraction/skull/annotations"
+
+    # Optional: directory with bonesets/bones/subbones name lists (ok if missing)
+    json_directory = "data_extraction/skull/json"
+
+    process_pptx_folders(
+        slides_folder, rels_folder, media_folder,
+        output_folder, json_output, json_directory
+    )
@@ -0,0 +1,5 @@
+{
+    "name": "Labels",
+    "id": "labels",
+    "description": []
+}
@@ -0,0 +1,49 @@
+{
+    "slide": "slide10",
+    "images": [
+        {
+            "rId": "rId8",
+            "extracted_name": "slide10_rId8.jpg"
+        }
+    ],
+    "annotations": [
+        {
+            "text": "",
+            "position": {
+                "x": 2743200,
+                "y": 1299410,
+                "width": 5943600,
+                "height": 4952999
+            }
+        },
+        {
+            "text": "Anterior view ",
+            "position": {
+                "x": 5372100,
+                "y": 6252408,
+                "width": 685800,
+                "height": 215444
+            },
+            "link": null
+        },
+        {
+            "text": "",
+            "position": {
+                "x": 3444876,
+                "y": 2397125,
+                "width": 4333875,
+                "height": 3282950
+            }
+        },
+        {
+            "text": "Mental tubercle",
+            "position": {
+                "x": 3581400,
+                "y": 5739596,
+                "width": 838200,
+                "height": 215444
+            },
+            "link": null
+        }
+    ]
+}
@@ -0,0 +1,19 @@
+{
+    "name": "Lateral view",
+    "id": "lateral_view",
+    "description": [
+        "The lateral view can be divided into 3 sections:",
+        "Facial \u2013 anterior",
+        "Temporal \u2013 middle",
+        "Occipital \u2013 posterior",
+        "The temporal section is separated by the zygomatic arch into into a",
+        "temporal fossa",
+        "superiorly and an infratemporal fossa inferiorly.",
+        "The origin of the temporalis muscle attaches to the temporal fossa",
+        "The",
+        "pterion",
+        "is located in the temporal fossa and marks the junction where the frontal, parietal, temporal, and sphenoid bones meet.",
+        "The pterion is an important landmark because it overlies the anterior branch of the middle meningeal artery.",
+        "Damage to this area of the skull can cause a rupture of this artery"
+    ]
+}