qiskit-community · frankharkins · Jan 29, 2026 · Jan 27, 2026 · Jan 27, 2026 · Jan 27, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -9,7 +9,6 @@ version = "0.1.0"
 crate-type = ["cdylib"]
 
 [dependencies]
-fancy-regex = "0.17.0"
 markdown    = "1.0.0"
 napi        = { version = "3.0.0", features = ["async"] }
 napi-derive = "3.0.0"

diff --git a/README.md b/README.md
@@ -49,5 +49,4 @@ Follow these steps for a foolproof way to make a new release:
 3. Run `gh pr create` and accept the defaults. This will create a PR with the correct title. Submit the PR.
 4. Once CI has passed and been approved, merge the PR. This will trigger a release.
 
-
 > WARN: Don't run `npm publish` manually.
diff --git a/__test__/index.spec.ts b/__test__/index.spec.ts
@@ -167,22 +167,42 @@ test('extractAnchors: duplicate headings', (t) => {
   )
 })
 
-test('extractAnchors: markdown in headings', (t) => {
-  t.deepEqual(extractAnchors('# My **heading**'), ['#my-**heading**'])
+test('extractAnchors: bold text in headings', (t) => {
+  t.deepEqual(extractAnchors('# My **heading**'), ['#my-heading'])
+})
+
+test('extractAnchors: code in headings', (t) => {
+  t.deepEqual(extractAnchors('# My `heading`'), ['#my-heading'])
+})
+
+test('extractAnchors: math in headings', (t) => {
+  t.deepEqual(extractAnchors('## Gates $\\rightarrow$ quantum gates'), ['#gates-rightarrow-quantum-gates'])
+  t.deepEqual(
+    extractAnchors(
+      '### Template circuits for calculating matrix elements of $\\tilde{S}$ and $\\tilde{H}$ via Hadamard test',
+    ),
+    ['#template-circuits-for-calculating-matrix-elements-of-tildes-and-tildeh-via-hadamard-test'],
+  )
+})
+
+test('extractAnchors: mdx in headings', (t) => {
+  t.deepEqual(extractAnchors('# My <B>heading</B>`'), ['#my-heading'])
 })
 
 test('extractAnchors: forbidden characters', (t) => {
   t.deepEqual(extractAnchors('## A heading with crazy punctuation.,;:!?`()"\\'), ['#a-heading-with-crazy-punctuation'])
 })
 
 test('extractAnchors: id tags', (t) => {
-  t.deepEqual(extractAnchors('<id="thing">'), ['#thing'])
+  t.deepEqual(extractAnchors('<span id="thing" />'), ['#thing'])
 })
 
 test('extractAnchors: duplicate id tags', (t) => {
-  t.deepEqual(extractAnchors('<id="thing">\n\n<id="thing">'), ['#thing'])
+  t.deepEqual(extractAnchors('<span id="thing" />\n\n<span id="thing" />'), ['#thing'])
 })
 
 test('extractAnchors: headings with links', (t) => {
-  t.deepEqual(extractAnchors('# My [heading](/test1) with [multiple links](/test2)'), ['#my-heading-with-multiple-links'])
+  t.deepEqual(extractAnchors('# My [heading](/test1) with [multiple links](/test2)'), [
+    '#my-heading-with-multiple-links',
+  ])
 })
diff --git a/src/anchors.rs b/src/anchors.rs
@@ -0,0 +1,105 @@
+use markdown::mdast::{AttributeContent, AttributeValue, Heading, Node};
+use std::collections::HashMap;
+
+/// If `node` is a heading or mdx element with `id` prop, extract the heading
+/// text and add it to `anchor_occurences`
+pub fn extract_from_node(node: &Node, anchor_occurrences: &mut HashMap<String, u32>) {
+  match node {
+    Node::Heading(h) => {
+      let anchor = anchor_from_heading(h);
+      let existing_duplicates = anchor_occurrences.get(&anchor).unwrap_or(&0);
+      anchor_occurrences.insert(anchor, *existing_duplicates + 1);
+    }
+    Node::MdxJsxFlowElement(el) => {
+      if let Some(anchor) = get_id_prop(&el.attributes) {
+        anchor_occurrences.insert(anchor, 1);
+      };
+    }
+    Node::MdxJsxTextElement(el) => {
+      if let Some(anchor) = get_id_prop(&el.attributes) {
+        anchor_occurrences.insert(anchor, 1);
+      };
+    }
+    _ => (),
+  };
+}
+
+pub fn deduplicate_anchors(anchor_occurrences: HashMap<String, u32>) -> Vec<String> {
+  anchor_occurrences
+    .into_iter()
+    .flat_map(|(anchor, duplications)| {
+      (0..duplications).map(move |n| match n {
+        0 => format!("#{anchor}"),
+        n => format!("#{anchor}-{n}"),
+      })
+    })
+    .collect()
+}
+
+fn get_id_prop(attributes: &Vec<AttributeContent>) -> Option<String> {
+  for attr in attributes.iter() {
+    let AttributeContent::Property(prop) = attr else {
+      continue;
+    };
+    if prop.name != "id" {
+      continue;
+    };
+    if let Some(AttributeValue::Literal(text)) = prop.value.clone() {
+      return Some(text);
+    }
+  }
+  return None;
+}
+
+fn anchor_from_heading(heading: &Heading) -> String {
+  let mut text = String::with_capacity(100);
+  for child in heading.children.iter() {
+    get_text(child, &mut text);
+  }
+  heading_to_anchor(text)
+}
+
+/// Get plain text from a node and all its children
+pub fn get_text<'a>(node: &'a Node, s: &mut String) {
+  let maybe_text = match node {
+    Node::Text(text) => Some(&text.value),
+    Node::InlineCode(text) => Some(&text.value),
+    Node::InlineMath(text) => Some(&text.value),
+    _ => None,
+  };
+  if let Some(text) = maybe_text {
+    s.push_str(text.as_str())
+  };
+
+  if let Some(children) = node.children() {
+    for child in children {
+      get_text(child, s);
+    }
+  }
+}
+
+fn heading_to_anchor(heading: String) -> String {
+  heading
+    .trim()
+    .to_lowercase()
+    .chars()
+    .filter_map(|c| match c {
+      ' ' => Some('-'),
+      '.' => None,
+      ',' => None,
+      ';' => None,
+      ':' => None,
+      '!' => None,
+      '?' => None,
+      '`' => None,
+      '\\' => None,
+      '(' => None,
+      ')' => None,
+      '"' => None,
+      '\'' => None,
+      '{' => None,
+      '}' => None,
+      x => Some(x),
+    })
+    .collect()
+}
diff --git a/src/anchors/mod.rs b/src/anchors/mod.rs
diff --git a/src/lib.rs b/src/lib.rs
@@ -1,12 +1,14 @@
+use std::collections::{HashMap, HashSet};
+
 use napi::Error;
 use napi_derive::napi;
 use tokio::fs;
 
-use crate::anchors::extract_anchors_from_ref;
 use crate::notebook::extract_markdown_from_notebook_source;
 
 mod anchors;
 mod links;
+mod mdx;
 mod notebook;
 
 fn file_read_error(path: String, reason: String) -> Error {
@@ -37,24 +39,42 @@ pub async fn extract_from_file(file_path: String) -> Result<Vec<Vec<String>>, Er
     source
   };
 
-  let anchors = extract_anchors_from_ref(&markdown);
-  match extract_links(markdown) {
-    Ok(links) => Ok(vec![links, anchors]),
-    Err(e) => Err(Error::from_reason(e.to_string())),
-  }
+  let ast_root = mdx::parse_mdx(markdown)?;
+
+  let mut link_set = HashSet::<&String>::default();
+  let mut anchor_occurrences = HashMap::<String, u32>::default();
+  mdx::walk_ast(&ast_root, &mut |node| {
+    links::extract_from_node(node, &mut link_set);
+    anchors::extract_from_node(node, &mut anchor_occurrences);
+  });
+
+  Ok(vec![
+    link_set.into_iter().cloned().collect(),
+    anchors::deduplicate_anchors(anchor_occurrences),
+  ])
 }
 
 /// Extract anchors from a markdown string. Anchors are either:
 ///  * slugified headings, deduplicated if the same heading appears more than once
 ///  * `id` props of HTML tags. These are not deduplicated as they should be unique per file
 #[napi]
-pub fn extract_anchors(markdown: String) -> Vec<String> {
-  extract_anchors_from_ref(&markdown)
+pub fn extract_anchors(markdown: String) -> Result<Vec<String>, Error> {
+  let ast_root = mdx::parse_mdx(markdown)?;
+  let mut anchor_occurrences = HashMap::<String, u32>::default();
+  mdx::walk_ast(&ast_root, &mut |node| {
+    anchors::extract_from_node(node, &mut anchor_occurrences)
+  });
+  Ok(anchors::deduplicate_anchors(anchor_occurrences))
 }
 
 /// Extract links from a markdown string. Supports GitHub-flavored markdown
 /// (gfm), math, and JSX.
 #[napi]
 pub fn extract_links(markdown: String) -> Result<Vec<String>, Error> {
-  links::extract_links(markdown)
+  let ast_root = mdx::parse_mdx(markdown)?;
+  let mut links = HashSet::<&String>::default();
+  mdx::walk_ast(&ast_root, &mut |node| {
+    links::extract_from_node(node, &mut links)
+  });
+  Ok(links.into_iter().cloned().collect())
 }
diff --git a/src/links.rs b/src/links.rs
@@ -0,0 +1,29 @@
+use markdown::mdast::Node;
+use markdown::mdast::{AttributeContent, AttributeValue, MdxJsxTextElement};
+use std::collections::HashSet;
+
+pub fn extract_from_node<'a>(node: &'a Node, links: &mut HashSet<&'a String>) {
+  let maybe_link = match node {
+    Node::Image(img) => Some(&img.url),
+    Node::Link(link) => Some(&link.url),
+    Node::MdxJsxTextElement(el) => extract_from_jsx_text_element(el),
+    _ => None,
+  };
+
+  if let Some(link) = maybe_link {
+    links.insert(link);
+  }
+}
+
+fn extract_from_jsx_text_element(el: &MdxJsxTextElement) -> Option<&String> {
+  let Some(Some(href_attr)) = el.attributes.iter().find_map(|attr| match attr {
+    AttributeContent::Property(p) if p.name == "href" => Some(&p.value),
+    _ => None,
+  }) else {
+    return None;
+  };
+  match href_attr {
+    AttributeValue::Literal(s) => Some(s),
+    _ => None,
+  }
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -49,5 +49,4 @@ Follow these steps for a foolproof way to make a new release:
		3. Run `gh pr create` and accept the defaults. This will create a PR with the correct title. Submit the PR.
		4. Once CI has passed and been approved, merge the PR. This will trigger a release.


		> WARN: Don't run `npm publish` manually.