Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ version = "0.1.0"
crate-type = ["cdylib"]

[dependencies]
fancy-regex = "0.17.0"
markdown = "1.0.0"
napi = { version = "3.0.0", features = ["async"] }
napi-derive = "3.0.0"
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,4 @@ Follow these steps for a foolproof way to make a new release:
3. Run `gh pr create` and accept the defaults. This will create a PR with the correct title. Submit the PR.
4. Once CI has passed and been approved, merge the PR. This will trigger a release.


> WARN: Don't run `npm publish` manually.
30 changes: 25 additions & 5 deletions __test__/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -167,22 +167,42 @@ test('extractAnchors: duplicate headings', (t) => {
)
})

test('extractAnchors: markdown in headings', (t) => {
t.deepEqual(extractAnchors('# My **heading**'), ['#my-**heading**'])
test('extractAnchors: bold text in headings', (t) => {
t.deepEqual(extractAnchors('# My **heading**'), ['#my-heading'])
})

test('extractAnchors: code in headings', (t) => {
t.deepEqual(extractAnchors('# My `heading`'), ['#my-heading'])
})

test('extractAnchors: math in headings', (t) => {
t.deepEqual(extractAnchors('## Gates $\\rightarrow$ quantum gates'), ['#gates-rightarrow-quantum-gates'])
t.deepEqual(
extractAnchors(
'### Template circuits for calculating matrix elements of $\\tilde{S}$ and $\\tilde{H}$ via Hadamard test',
),
['#template-circuits-for-calculating-matrix-elements-of-tildes-and-tildeh-via-hadamard-test'],
)
})

test('extractAnchors: mdx in headings', (t) => {
t.deepEqual(extractAnchors('# My <B>heading</B>`'), ['#my-heading'])
})

test('extractAnchors: forbidden characters', (t) => {
t.deepEqual(extractAnchors('## A heading with crazy punctuation.,;:!?`()"\\'), ['#a-heading-with-crazy-punctuation'])
})

test('extractAnchors: id tags', (t) => {
t.deepEqual(extractAnchors('<id="thing">'), ['#thing'])
t.deepEqual(extractAnchors('<span id="thing" />'), ['#thing'])
})

test('extractAnchors: duplicate id tags', (t) => {
t.deepEqual(extractAnchors('<id="thing">\n\n<id="thing">'), ['#thing'])
t.deepEqual(extractAnchors('<span id="thing" />\n\n<span id="thing" />'), ['#thing'])
})

test('extractAnchors: headings with links', (t) => {
t.deepEqual(extractAnchors('# My [heading](/test1) with [multiple links](/test2)'), ['#my-heading-with-multiple-links'])
t.deepEqual(extractAnchors('# My [heading](/test1) with [multiple links](/test2)'), [
'#my-heading-with-multiple-links',
])
})
105 changes: 105 additions & 0 deletions src/anchors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
use markdown::mdast::{AttributeContent, AttributeValue, Heading, Node};
use std::collections::HashMap;

/// If `node` is a heading or mdx element with `id` prop, extract the heading
/// text and add it to `anchor_occurences`
pub fn extract_from_node(node: &Node, anchor_occurrences: &mut HashMap<String, u32>) {
match node {
Node::Heading(h) => {
let anchor = anchor_from_heading(h);
let existing_duplicates = anchor_occurrences.get(&anchor).unwrap_or(&0);
anchor_occurrences.insert(anchor, *existing_duplicates + 1);
}
Node::MdxJsxFlowElement(el) => {
if let Some(anchor) = get_id_prop(&el.attributes) {
anchor_occurrences.insert(anchor, 1);
};
}
Node::MdxJsxTextElement(el) => {
if let Some(anchor) = get_id_prop(&el.attributes) {
anchor_occurrences.insert(anchor, 1);
};
}
_ => (),
};
}

pub fn deduplicate_anchors(anchor_occurrences: HashMap<String, u32>) -> Vec<String> {
anchor_occurrences
.into_iter()
.flat_map(|(anchor, duplications)| {
(0..duplications).map(move |n| match n {
0 => format!("#{anchor}"),
n => format!("#{anchor}-{n}"),
})
})
.collect()
}

fn get_id_prop(attributes: &Vec<AttributeContent>) -> Option<String> {
for attr in attributes.iter() {
let AttributeContent::Property(prop) = attr else {
continue;
};
if prop.name != "id" {
continue;
};
if let Some(AttributeValue::Literal(text)) = prop.value.clone() {
return Some(text);
}
}
return None;
}

fn anchor_from_heading(heading: &Heading) -> String {
let mut text = String::with_capacity(100);
for child in heading.children.iter() {
get_text(child, &mut text);
}
heading_to_anchor(text)
}

/// Get plain text from a node and all its children
pub fn get_text<'a>(node: &'a Node, s: &mut String) {
let maybe_text = match node {
Node::Text(text) => Some(&text.value),
Node::InlineCode(text) => Some(&text.value),
Node::InlineMath(text) => Some(&text.value),
_ => None,
};
if let Some(text) = maybe_text {
s.push_str(text.as_str())
};

if let Some(children) = node.children() {
for child in children {
get_text(child, s);
}
}
}

fn heading_to_anchor(heading: String) -> String {
heading
.trim()
.to_lowercase()
.chars()
.filter_map(|c| match c {
' ' => Some('-'),
'.' => None,
',' => None,
';' => None,
':' => None,
'!' => None,
'?' => None,
'`' => None,
'\\' => None,
'(' => None,
')' => None,
'"' => None,
'\'' => None,
'{' => None,
'}' => None,
x => Some(x),
})
.collect()
}
72 changes: 0 additions & 72 deletions src/anchors/mod.rs

This file was deleted.

38 changes: 29 additions & 9 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
use std::collections::{HashMap, HashSet};

use napi::Error;
use napi_derive::napi;
use tokio::fs;

use crate::anchors::extract_anchors_from_ref;
use crate::notebook::extract_markdown_from_notebook_source;

mod anchors;
mod links;
mod mdx;
mod notebook;

fn file_read_error(path: String, reason: String) -> Error {
Expand Down Expand Up @@ -37,24 +39,42 @@ pub async fn extract_from_file(file_path: String) -> Result<Vec<Vec<String>>, Er
source
};

let anchors = extract_anchors_from_ref(&markdown);
match extract_links(markdown) {
Ok(links) => Ok(vec![links, anchors]),
Err(e) => Err(Error::from_reason(e.to_string())),
}
let ast_root = mdx::parse_mdx(markdown)?;

let mut link_set = HashSet::<&String>::default();
let mut anchor_occurrences = HashMap::<String, u32>::default();
mdx::walk_ast(&ast_root, &mut |node| {
links::extract_from_node(node, &mut link_set);
anchors::extract_from_node(node, &mut anchor_occurrences);
});

Ok(vec![
link_set.into_iter().cloned().collect(),
anchors::deduplicate_anchors(anchor_occurrences),
])
}

/// Extract anchors from a markdown string. Anchors are either:
/// * slugified headings, deduplicated if the same heading appears more than once
/// * `id` props of HTML tags. These are not deduplicated as they should be unique per file
#[napi]
pub fn extract_anchors(markdown: String) -> Vec<String> {
extract_anchors_from_ref(&markdown)
pub fn extract_anchors(markdown: String) -> Result<Vec<String>, Error> {
let ast_root = mdx::parse_mdx(markdown)?;
let mut anchor_occurrences = HashMap::<String, u32>::default();
mdx::walk_ast(&ast_root, &mut |node| {
anchors::extract_from_node(node, &mut anchor_occurrences)
});
Ok(anchors::deduplicate_anchors(anchor_occurrences))
}

/// Extract links from a markdown string. Supports GitHub-flavored markdown
/// (gfm), math, and JSX.
#[napi]
pub fn extract_links(markdown: String) -> Result<Vec<String>, Error> {
links::extract_links(markdown)
let ast_root = mdx::parse_mdx(markdown)?;
let mut links = HashSet::<&String>::default();
mdx::walk_ast(&ast_root, &mut |node| {
links::extract_from_node(node, &mut links)
});
Ok(links.into_iter().cloned().collect())
}
29 changes: 29 additions & 0 deletions src/links.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use markdown::mdast::Node;
use markdown::mdast::{AttributeContent, AttributeValue, MdxJsxTextElement};
use std::collections::HashSet;

pub fn extract_from_node<'a>(node: &'a Node, links: &mut HashSet<&'a String>) {
let maybe_link = match node {
Node::Image(img) => Some(&img.url),
Node::Link(link) => Some(&link.url),
Node::MdxJsxTextElement(el) => extract_from_jsx_text_element(el),
_ => None,
};

if let Some(link) = maybe_link {
links.insert(link);
}
}

fn extract_from_jsx_text_element(el: &MdxJsxTextElement) -> Option<&String> {
let Some(Some(href_attr)) = el.attributes.iter().find_map(|attr| match attr {
AttributeContent::Property(p) if p.name == "href" => Some(&p.value),
_ => None,
}) else {
return None;
};
match href_attr {
AttributeValue::Literal(s) => Some(s),
_ => None,
}
}
Loading