Skip to content

Commit 943539c

Browse files
Allow reading from files (#4)
* Add tests * Read markdown file * Fix tests * Read notebooks too * Include file path in error message * Make file reading async * Fix for windows? * Actually fix for windows Should have read the errors properly * Update src/lib.rs Co-authored-by: Eric Arellano <14852634+Eric-Arellano@users.noreply.github.com> --------- Co-authored-by: Eric Arellano <14852634+Eric-Arellano@users.noreply.github.com>
1 parent f226937 commit 943539c

File tree

10 files changed

+211
-2
lines changed

10 files changed

+211
-2
lines changed

Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,11 @@ crate-type = ["cdylib"]
1010

1111
[dependencies]
1212
markdown = "1.0.0"
13-
napi = "3.0.0"
13+
napi = { version = "3.0.0", features = ["async"] }
1414
napi-derive = "3.0.0"
15+
serde = { version = "1.0.228", features = ["derive"] }
16+
serde_json = "1.0.149"
17+
tokio = { version = "1.49.0", features = ["fs"] }
1518

1619
[build-dependencies]
1720
napi-build = "2"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"source": [
6+
"This is not valid JSON due to the following trailing comma:",
7+
]
8+
}
9+
]
10+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"sauce": [
6+
"# Example notebook\n",
7+
"\n",
8+
"With a [link](/path)!"
9+
]
10+
}
11+
]
12+
}

__test__/fixtures/markdown.ipynb

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "48bb7c88-5f83-4f6d-9fe8-aa20f188c4ff",
6+
"metadata": {},
7+
"source": [
8+
"# Example notebook\n",
9+
"\n",
10+
"With a [link](/path)!"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": 1,
16+
"id": "bcaf0da4-226f-46f7-b575-8a68880acdfd",
17+
"metadata": {},
18+
"outputs": [
19+
{
20+
"data": {
21+
"text/plain": [
22+
"Some output"
23+
]
24+
},
25+
"execution_count": 1,
26+
"metadata": {},
27+
"output_type": "execute_result"
28+
}
29+
],
30+
"source": [
31+
"print(\"Some output\")"
32+
]
33+
},
34+
{
35+
"cell_type": "raw",
36+
"id": "e30adffd-fb91-4711-abfa-79ebe7eeee46",
37+
"metadata": {},
38+
"source": [
39+
"???"
40+
]
41+
},
42+
{
43+
"cell_type": "markdown",
44+
"id": "48bb7c88-5f83-4f6d-9fe8-aa20f188c4ff",
45+
"metadata": {},
46+
"source": [
47+
"And a second [link](/path2)"
48+
]
49+
}
50+
],
51+
"metadata": {
52+
"kernelspec": {
53+
"display_name": "Python 3",
54+
"language": "python",
55+
"name": "python3"
56+
},
57+
"language_info": {
58+
"codemirror_mode": {
59+
"name": "ipython",
60+
"version": 3
61+
},
62+
"file_extension": ".py",
63+
"mimetype": "text/x-python",
64+
"name": "python",
65+
"nbconvert_exporter": "python",
66+
"pygments_lexer": "ipython3",
67+
"version": "3"
68+
},
69+
"title": "Example notebook"
70+
},
71+
"nbformat": 4,
72+
"nbformat_minor": 4
73+
}

__test__/fixtures/markdown.mdx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Example document
2+
3+
Here's a simple page with a [link](/path)!

__test__/index.spec.ts

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import test from 'ava'
22

3-
import { extractLinks } from '../index'
3+
import { extractLinks, extractLinksFromFile } from '../index'
44

55
const dedent = (s: string) => s.replace('\n ', '')
66

@@ -108,3 +108,49 @@ test('extractLinks: appropriate jsx error message', (t) => {
108108
t.is(error.name, 'Error')
109109
t.is(error.message, '1:13: Expected a closing tag for `<Admonition>` (1:1) (markdown-rs:end-tag-mismatch)')
110110
})
111+
112+
test('extractLinksFromFile: mdx file', async (t) => {
113+
const links = await extractLinksFromFile('__test__/fixtures/markdown.mdx')
114+
t.deepEqual(links, ['/path'])
115+
})
116+
117+
test('extractLinksFromFile: notebook', async (t) => {
118+
const links = (await extractLinksFromFile('__test__/fixtures/markdown.ipynb')).sort()
119+
t.deepEqual(links, ['/path', '/path2'].sort())
120+
})
121+
122+
test('extractLinksFromFile: markdown file not found', async (t) => {
123+
const error = await t.throwsAsync(
124+
async () => await extractLinksFromFile('__test__/fixtures/file_that_does_not_exist.md'),
125+
)
126+
t.is(error.name, 'Error')
127+
128+
// The error message changes depending on OS, but both are acceptable
129+
const acceptableMessages = [
130+
'Could not read "__test__/fixtures/file_that_does_not_exist.md": No such file or directory (os error 2)',
131+
'Could not read "__test__/fixtures/file_that_does_not_exist.md": The system cannot find the file specified. (os error 2)',
132+
]
133+
t.assert(acceptableMessages.includes(error.message))
134+
})
135+
136+
test('extractLinksFromFile: invalid notebook (not JSON)', async (t) => {
137+
const error = await t.throwsAsync(
138+
async () => await extractLinksFromFile('__test__/fixtures/invalid-notebook-json.ipynb'),
139+
)
140+
t.is(error.name, 'Error')
141+
t.is(
142+
error.message,
143+
'Could not read "__test__/fixtures/invalid-notebook-json.ipynb": trailing comma at line 7 column 7',
144+
)
145+
})
146+
147+
test('extractLinksFromFile: invalid notebook (bad schema)', async (t) => {
148+
const error = await t.throwsAsync(
149+
async () => await extractLinksFromFile('__test__/fixtures/invalid-notebook-schema.ipynb'),
150+
)
151+
t.is(error.name, 'Error')
152+
t.is(
153+
error.message,
154+
'Could not read "__test__/fixtures/invalid-notebook-schema.ipynb": missing field `source` at line 10 column 5',
155+
)
156+
})

index.d.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@
55
* (gfm), math, and JSX.
66
*/
77
export declare function extractLinks(markdown: string): Array<string>
8+
9+
export declare function extractLinksFromFile(filePath: string): Promise<Array<string>>

index.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,3 +573,4 @@ if (!nativeBinding) {
573573

574574
module.exports = nativeBinding
575575
module.exports.extractLinks = nativeBinding.extractLinks
576+
module.exports.extractLinksFromFile = nativeBinding.extractLinksFromFile

src/lib.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,36 @@ use markdown::{mdast::Node, to_mdast, Constructs, ParseOptions};
55
use napi::Error;
66
use napi_derive::napi;
77
use std::collections::HashSet;
8+
use tokio::fs;
9+
10+
use crate::notebook::extract_markdown_from_notebook_source;
11+
12+
mod notebook;
13+
14+
fn file_read_error(path: String, reason: String) -> Result<Vec<String>, Error> {
15+
let message = format!("Could not read \"{path}\": {reason}");
16+
Err(Error::from_reason(message))
17+
}
18+
19+
#[napi]
20+
pub async fn extract_links_from_file(file_path: String) -> Result<Vec<String>, Error> {
21+
let is_notebook = file_path.ends_with(".ipynb");
22+
let source = match fs::read_to_string(&file_path).await {
23+
Ok(s) => s,
24+
Err(e) => return file_read_error(file_path, e.to_string()),
25+
};
26+
27+
let markdown = if is_notebook {
28+
match extract_markdown_from_notebook_source(source) {
29+
Ok(md) => md,
30+
Err(e) => return file_read_error(file_path, e.to_string()),
31+
}
32+
} else {
33+
source
34+
};
35+
36+
extract_links(markdown)
37+
}
838

939
/// Extract links from a markdown string. Supports GitHub-flavored markdown
1040
/// (gfm), math, and JSX.

src/notebook/mod.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
use serde::Deserialize;
2+
3+
pub fn extract_markdown_from_notebook_source(source: String) -> Result<String, String> {
4+
let notebook: Notebook = match serde_json::from_str(&source) {
5+
Ok(s) => s,
6+
Err(e) => return Err(e.to_string()),
7+
};
8+
9+
let markdown: String = notebook
10+
.cells
11+
.into_iter()
12+
.filter(|cell| cell.cell_type == "markdown")
13+
.map(|cell| cell.source.join(""))
14+
.collect::<Vec<String>>()
15+
.join("\n\n");
16+
17+
Ok(markdown)
18+
}
19+
20+
#[derive(Deserialize)]
21+
struct Notebook {
22+
cells: Vec<NotebookCell>,
23+
}
24+
25+
#[derive(Deserialize)]
26+
struct NotebookCell {
27+
cell_type: String,
28+
source: Vec<String>,
29+
}

0 commit comments

Comments
 (0)