Skip to content

Commit c8165ba

Browse files
authored
Extract both links and anchors from file (#6)
* Extract both links and anchors from file * Refactor: Move links to own module
1 parent b898dae commit c8165ba

File tree

5 files changed

+111
-92
lines changed

5 files changed

+111
-92
lines changed

__test__/index.spec.ts

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import test from 'ava'
22

3-
import { extractLinks, extractAnchors, extractLinksFromFile } from '../index'
3+
import { extractLinks, extractAnchors, extractFromFile } from '../index'
44

55
const dedent = (s: string) => s.replace('\n ', '')
66

@@ -109,20 +109,19 @@ test('extractLinks: appropriate jsx error message', (t) => {
109109
t.is(error.message, '1:13: Expected a closing tag for `<Admonition>` (1:1) (markdown-rs:end-tag-mismatch)')
110110
})
111111

112-
test('extractLinksFromFile: mdx file', async (t) => {
113-
const links = await extractLinksFromFile('__test__/fixtures/markdown.mdx')
114-
t.deepEqual(links, ['/path'])
112+
test('extractFromFile: mdx file', async (t) => {
113+
const result = await extractFromFile('__test__/fixtures/markdown.mdx')
114+
t.deepEqual(result, [['/path'], ['#example-document']])
115115
})
116116

117-
test('extractLinksFromFile: notebook', async (t) => {
118-
const links = (await extractLinksFromFile('__test__/fixtures/markdown.ipynb')).sort()
119-
t.deepEqual(links, ['/path', '/path2'].sort())
117+
test('extractFromFile: notebook', async (t) => {
118+
const [links, anchors] = await extractFromFile('__test__/fixtures/markdown.ipynb')
119+
t.deepEqual(links.sort(), ['/path', '/path2'])
120+
t.deepEqual(anchors, ['#example-notebook'])
120121
})
121122

122-
test('extractLinksFromFile: markdown file not found', async (t) => {
123-
const error = await t.throwsAsync(
124-
async () => await extractLinksFromFile('__test__/fixtures/file_that_does_not_exist.md'),
125-
)
123+
test('extractFromFile: markdown file not found', async (t) => {
124+
const error = await t.throwsAsync(async () => await extractFromFile('__test__/fixtures/file_that_does_not_exist.md'))
126125
t.is(error.name, 'Error')
127126

128127
// The error message changes depending on OS, but both are acceptable
@@ -133,20 +132,18 @@ test('extractLinksFromFile: markdown file not found', async (t) => {
133132
t.assert(acceptableMessages.includes(error.message))
134133
})
135134

136-
test('extractLinksFromFile: invalid notebook (not JSON)', async (t) => {
137-
const error = await t.throwsAsync(
138-
async () => await extractLinksFromFile('__test__/fixtures/invalid-notebook-json.ipynb'),
139-
)
135+
test('extractFromFile: invalid notebook (not JSON)', async (t) => {
136+
const error = await t.throwsAsync(async () => await extractFromFile('__test__/fixtures/invalid-notebook-json.ipynb'))
140137
t.is(error.name, 'Error')
141138
t.is(
142139
error.message,
143140
'Could not read "__test__/fixtures/invalid-notebook-json.ipynb": trailing comma at line 7 column 7',
144141
)
145142
})
146143

147-
test('extractLinksFromFile: invalid notebook (bad schema)', async (t) => {
144+
test('extractFromFile: invalid notebook (bad schema)', async (t) => {
148145
const error = await t.throwsAsync(
149-
async () => await extractLinksFromFile('__test__/fixtures/invalid-notebook-schema.ipynb'),
146+
async () => await extractFromFile('__test__/fixtures/invalid-notebook-schema.ipynb'),
150147
)
151148
t.is(error.name, 'Error')
152149
t.is(

index.d.ts

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,18 @@
77
*/
88
export declare function extractAnchors(markdown: string): Array<string>
99

10+
/**
11+
* Extracts links and anchors from an MDX file or notebook containing MDX.
12+
*
13+
* Example:
14+
* ```ts
15+
* const [links, anchors] = await extractFromFile("notebook.ipynb");
16+
* ```
17+
*/
18+
export declare function extractFromFile(filePath: string): Promise<[string[], string[]]>
19+
1020
/**
1121
* Extract links from a markdown string. Supports GitHub-flavored markdown
1222
* (gfm), math, and JSX.
1323
*/
1424
export declare function extractLinks(markdown: string): Array<string>
15-
16-
export declare function extractLinksFromFile(filePath: string): Promise<Array<string>>

index.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -573,5 +573,5 @@ if (!nativeBinding) {
573573

574574
module.exports = nativeBinding
575575
module.exports.extractAnchors = nativeBinding.extractAnchors
576+
module.exports.extractFromFile = nativeBinding.extractFromFile
576577
module.exports.extractLinks = nativeBinding.extractLinks
577-
module.exports.extractLinksFromFile = nativeBinding.extractLinksFromFile

src/lib.rs

Lines changed: 21 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,49 @@
1-
use markdown::mdast::{AttributeContent, AttributeValue, MdxJsxTextElement};
2-
use markdown::{mdast::Node, to_mdast, Constructs, ParseOptions};
31
use napi::Error;
42
use napi_derive::napi;
5-
use std::collections::HashSet;
63
use tokio::fs;
74

5+
use crate::anchors::extract_anchors_from_ref;
86
use crate::notebook::extract_markdown_from_notebook_source;
97

8+
mod anchors;
9+
mod links;
1010
mod notebook;
1111

12-
fn file_read_error(path: String, reason: String) -> Result<Vec<String>, Error> {
12+
fn file_read_error(path: String, reason: String) -> Error {
1313
let message = format!("Could not read \"{path}\": {reason}");
14-
Err(Error::from_reason(message))
14+
Error::from_reason(message)
1515
}
1616

17-
#[napi]
18-
pub async fn extract_links_from_file(file_path: String) -> Result<Vec<String>, Error> {
17+
/// Extracts links and anchors from an MDX file or notebook containing MDX.
18+
///
19+
/// Example:
20+
/// ```ts
21+
/// const [links, anchors] = await extractFromFile("notebook.ipynb");
22+
/// ```
23+
#[napi(ts_return_type = "Promise<[string[], string[]]>")]
24+
pub async fn extract_from_file(file_path: String) -> Result<Vec<Vec<String>>, Error> {
1925
let is_notebook = file_path.ends_with(".ipynb");
2026
let source = match fs::read_to_string(&file_path).await {
2127
Ok(s) => s,
22-
Err(e) => return file_read_error(file_path, e.to_string()),
28+
Err(e) => return Err(file_read_error(file_path, e.to_string())),
2329
};
2430

2531
let markdown = if is_notebook {
2632
match extract_markdown_from_notebook_source(source) {
2733
Ok(md) => md,
28-
Err(e) => return file_read_error(file_path, e.to_string()),
34+
Err(e) => return Err(file_read_error(file_path, e.to_string())),
2935
}
3036
} else {
3137
source
3238
};
3339

34-
extract_links(markdown)
40+
let anchors = extract_anchors_from_ref(&markdown);
41+
match extract_links(markdown) {
42+
Ok(links) => Ok(vec![links, anchors]),
43+
Err(e) => Err(Error::from_reason(e.to_string())),
44+
}
3545
}
3646

37-
use crate::anchors::extract_anchors_from_ref;
38-
39-
mod anchors;
40-
4147
/// Extract anchors from a markdown string. Anchors are either:
4248
/// * slugified headings, deduplicated if the same heading appears more than once
4349
/// * `id` props of HTML tags. These are not deduplicated as they should be unique per file
@@ -50,62 +56,5 @@ pub fn extract_anchors(markdown: String) -> Vec<String> {
5056
/// (gfm), math, and JSX.
5157
#[napi]
5258
pub fn extract_links(markdown: String) -> Result<Vec<String>, Error> {
53-
let options = ParseOptions {
54-
constructs: Constructs {
55-
gfm_autolink_literal: true,
56-
gfm_footnote_definition: true,
57-
gfm_label_start_footnote: true,
58-
gfm_strikethrough: true,
59-
gfm_table: true,
60-
gfm_task_list_item: true,
61-
math_flow: true,
62-
math_text: true,
63-
mdx_jsx_flow: true,
64-
mdx_jsx_text: true,
65-
..Constructs::mdx()
66-
},
67-
..ParseOptions::mdx()
68-
};
69-
70-
let ast = match to_mdast(markdown.as_str(), &options) {
71-
Ok(ast) => ast,
72-
Err(m) => return Err(Error::from_reason(m.to_string())),
73-
};
74-
75-
let mut links = HashSet::<&String>::default();
76-
extract_from_node(&ast, &mut links);
77-
78-
Ok(links.into_iter().cloned().collect())
79-
}
80-
81-
fn extract_from_node<'a>(node: &'a Node, links: &mut HashSet<&'a String>) {
82-
let maybe_link = match node {
83-
Node::Image(img) => Some(&img.url),
84-
Node::Link(link) => Some(&link.url),
85-
Node::MdxJsxTextElement(el) => extract_from_jsx_text_element(el),
86-
_ => None,
87-
};
88-
89-
if let Some(link) = maybe_link {
90-
links.insert(link);
91-
}
92-
93-
if let Some(children) = node.children() {
94-
for child in children {
95-
extract_from_node(child, links);
96-
}
97-
}
98-
}
99-
100-
fn extract_from_jsx_text_element(el: &MdxJsxTextElement) -> Option<&String> {
101-
let Some(Some(href_attr)) = el.attributes.iter().find_map(|attr| match attr {
102-
AttributeContent::Property(p) if p.name == "href" => Some(&p.value),
103-
_ => None,
104-
}) else {
105-
return None;
106-
};
107-
match href_attr {
108-
AttributeValue::Literal(s) => Some(s),
109-
_ => None,
110-
}
59+
links::extract_links(markdown)
11160
}

src/links/mod.rs

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
use markdown::mdast::{AttributeContent, AttributeValue, MdxJsxTextElement};
2+
use markdown::{mdast::Node, to_mdast, Constructs, ParseOptions};
3+
use napi::Error;
4+
use std::collections::HashSet;
5+
6+
pub fn extract_links(markdown: String) -> Result<Vec<String>, Error> {
7+
let options = ParseOptions {
8+
constructs: Constructs {
9+
gfm_autolink_literal: true,
10+
gfm_footnote_definition: true,
11+
gfm_label_start_footnote: true,
12+
gfm_strikethrough: true,
13+
gfm_table: true,
14+
gfm_task_list_item: true,
15+
math_flow: true,
16+
math_text: true,
17+
mdx_jsx_flow: true,
18+
mdx_jsx_text: true,
19+
..Constructs::mdx()
20+
},
21+
..ParseOptions::mdx()
22+
};
23+
24+
let ast = match to_mdast(markdown.as_str(), &options) {
25+
Ok(ast) => ast,
26+
Err(m) => return Err(Error::from_reason(m.to_string())),
27+
};
28+
29+
let mut links = HashSet::<&String>::default();
30+
extract_from_node(&ast, &mut links);
31+
32+
Ok(links.into_iter().cloned().collect())
33+
}
34+
35+
fn extract_from_node<'a>(node: &'a Node, links: &mut HashSet<&'a String>) {
36+
let maybe_link = match node {
37+
Node::Image(img) => Some(&img.url),
38+
Node::Link(link) => Some(&link.url),
39+
Node::MdxJsxTextElement(el) => extract_from_jsx_text_element(el),
40+
_ => None,
41+
};
42+
43+
if let Some(link) = maybe_link {
44+
links.insert(link);
45+
}
46+
47+
if let Some(children) = node.children() {
48+
for child in children {
49+
extract_from_node(child, links);
50+
}
51+
}
52+
}
53+
54+
fn extract_from_jsx_text_element(el: &MdxJsxTextElement) -> Option<&String> {
55+
let Some(Some(href_attr)) = el.attributes.iter().find_map(|attr| match attr {
56+
AttributeContent::Property(p) if p.name == "href" => Some(&p.value),
57+
_ => None,
58+
}) else {
59+
return None;
60+
};
61+
match href_attr {
62+
AttributeValue::Literal(s) => Some(s),
63+
_ => None,
64+
}
65+
}

0 commit comments

Comments
 (0)