Skip to content

Commit b898dae

Browse files
authored
Add parseAnchors function (#5)
* Add `parseAnchors` function * clippy
1 parent 943539c commit b898dae

File tree

6 files changed

+115
-3
lines changed

6 files changed

+115
-3
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ version = "0.1.0"
99
crate-type = ["cdylib"]
1010

1111
[dependencies]
12+
fancy-regex = "0.17.0"
1213
markdown = "1.0.0"
1314
napi = { version = "3.0.0", features = ["async"] }
1415
napi-derive = "3.0.0"

__test__/index.spec.ts

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import test from 'ava'
22

3-
import { extractLinks, extractLinksFromFile } from '../index'
3+
import { extractLinks, extractAnchors, extractLinksFromFile } from '../index'
44

55
const dedent = (s: string) => s.replace('\n ', '')
66

@@ -154,3 +154,34 @@ test('extractLinksFromFile: invalid notebook (bad schema)', async (t) => {
154154
'Could not read "__test__/fixtures/invalid-notebook-schema.ipynb": missing field `source` at line 10 column 5',
155155
)
156156
})
157+
158+
test('extractAnchors: no anchors', (t) => {
159+
t.deepEqual(extractAnchors(''), [])
160+
})
161+
162+
test('extractAnchors: simple heading', (t) => {
163+
t.deepEqual(extractAnchors('# My heading'), ['#my-heading'])
164+
})
165+
166+
test('extractAnchors: duplicate headings', (t) => {
167+
t.deepEqual(
168+
extractAnchors('# My heading\n\n## My heading\n\n### My heading').sort(),
169+
['#my-heading', '#my-heading-1', '#my-heading-2'].sort(),
170+
)
171+
})
172+
173+
test('extractAnchors: markdown in headings', (t) => {
174+
t.deepEqual(extractAnchors('# My **heading**'), ['#my-**heading**'])
175+
})
176+
177+
test('extractAnchors: forbidden characters', (t) => {
178+
t.deepEqual(extractAnchors('## A heading with crazy punctuation.,;:!?`\()"\\'), ['#a-heading-with-crazy-punctuation'])
179+
})
180+
181+
test('extractAnchors: id tags', (t) => {
182+
t.deepEqual(extractAnchors('<id="thing">'), ['#thing'])
183+
})
184+
185+
test('extractAnchors: duplicate id tags', (t) => {
186+
t.deepEqual(extractAnchors('<id="thing">\n\n<id="thing">'), ['#thing'])
187+
})

index.d.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
/* auto-generated by NAPI-RS */
22
/* eslint-disable */
3+
/**
4+
* Extract anchors from a markdown string. Anchors are either:
5+
* * slugified headings, deduplicated if the same heading appears more than once
6+
* * `id` props of HTML tags. These are not deduplicated as they should be unique per file
7+
*/
8+
export declare function extractAnchors(markdown: string): Array<string>
9+
310
/**
411
* Extract links from a markdown string. Supports GitHub-flavored markdown
512
* (gfm), math, and JSX.

index.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,5 +572,6 @@ if (!nativeBinding) {
572572
}
573573

574574
module.exports = nativeBinding
575+
module.exports.extractAnchors = nativeBinding.extractAnchors
575576
module.exports.extractLinks = nativeBinding.extractLinks
576577
module.exports.extractLinksFromFile = nativeBinding.extractLinksFromFile

src/anchors/mod.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
use fancy_regex::Regex;
2+
use std::collections::HashMap;
3+
4+
pub fn extract_anchors_from_ref(markdown: &str) -> Vec<String> {
5+
let heading_regex = Regex::new("^\\s*#{1,6}\\s+(.+?)\\s*$").unwrap();
6+
let id_regex = Regex::new("(?<=id=\")(.+?)(?=\")").unwrap();
7+
8+
let mut anchor_occurrences = HashMap::<String, u32>::default();
9+
for line in markdown.split("\n") {
10+
if let Some(heading) = get_first_capture(line, &heading_regex) {
11+
let anchor = heading_to_anchor(heading);
12+
let existing_duplicates = anchor_occurrences.get(&anchor).unwrap_or(&0);
13+
anchor_occurrences.insert(anchor, *existing_duplicates + 1);
14+
}
15+
if let Some(id) = get_first_capture(line, &id_regex) {
16+
if !anchor_occurrences.contains_key(id) {
17+
anchor_occurrences.insert(id.to_string(), 1);
18+
}
19+
}
20+
}
21+
22+
anchor_occurrences
23+
.into_iter()
24+
.flat_map(|(anchor, duplications)| {
25+
(0..duplications).map(move |n| match n {
26+
0 => format!("#{anchor}"),
27+
n => format!("#{anchor}-{n}"),
28+
})
29+
})
30+
.collect()
31+
}
32+
33+
fn heading_to_anchor(heading: &str) -> String {
34+
heading
35+
.trim()
36+
.to_lowercase()
37+
.chars()
38+
.filter_map(|c| match c {
39+
' ' => Some('-'),
40+
'.' => None,
41+
',' => None,
42+
';' => None,
43+
':' => None,
44+
'!' => None,
45+
'?' => None,
46+
'`' => None,
47+
'\\' => None,
48+
'(' => None,
49+
')' => None,
50+
'"' => None,
51+
'\'' => None,
52+
x => Some(x),
53+
})
54+
.collect()
55+
}
56+
57+
fn get_first_capture<'a>(s: &'a str, r: &Regex) -> Option<&'a str> {
58+
let Ok(Some(captures)) = r.captures(s) else {
59+
return None;
60+
};
61+
Some(captures.get(1)?.as_str())
62+
}

src/lib.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
#![deny(clippy::all)]
2-
31
use markdown::mdast::{AttributeContent, AttributeValue, MdxJsxTextElement};
42
use markdown::{mdast::Node, to_mdast, Constructs, ParseOptions};
53
use napi::Error;
@@ -36,6 +34,18 @@ pub async fn extract_links_from_file(file_path: String) -> Result<Vec<String>, E
3634
extract_links(markdown)
3735
}
3836

37+
use crate::anchors::extract_anchors_from_ref;
38+
39+
mod anchors;
40+
41+
/// Extract anchors from a markdown string. Anchors are either:
42+
/// * slugified headings, deduplicated if the same heading appears more than once
43+
/// * `id` props of HTML tags. These are not deduplicated as they should be unique per file
44+
#[napi]
45+
pub fn extract_anchors(markdown: String) -> Vec<String> {
46+
extract_anchors_from_ref(&markdown)
47+
}
48+
3949
/// Extract links from a markdown string. Supports GitHub-flavored markdown
4050
/// (gfm), math, and JSX.
4151
#[napi]

0 commit comments

Comments
 (0)