1- use markdown:: mdast:: { AttributeContent , AttributeValue , MdxJsxTextElement } ;
2- use markdown:: { mdast:: Node , to_mdast, Constructs , ParseOptions } ;
31use napi:: Error ;
42use napi_derive:: napi;
5- use std:: collections:: HashSet ;
63use tokio:: fs;
74
5+ use crate :: anchors:: extract_anchors_from_ref;
86use crate :: notebook:: extract_markdown_from_notebook_source;
97
8+ mod anchors;
9+ mod links;
1010mod notebook;
1111
12- fn file_read_error ( path : String , reason : String ) -> Result < Vec < String > , Error > {
12+ fn file_read_error ( path : String , reason : String ) -> Error {
1313 let message = format ! ( "Could not read \" {path}\" : {reason}" ) ;
14- Err ( Error :: from_reason ( message) )
14+ Error :: from_reason ( message)
1515}
1616
17- #[ napi]
18- pub async fn extract_links_from_file ( file_path : String ) -> Result < Vec < String > , Error > {
17+ /// Extracts links and anchors from an MDX file or notebook containing MDX.
18+ ///
19+ /// Example:
20+ /// ```ts
21+ /// const [links, anchors] = await extractFromFile("notebook.ipynb");
22+ /// ```
23+ #[ napi( ts_return_type = "Promise<[string[], string[]]>" ) ]
24+ pub async fn extract_from_file ( file_path : String ) -> Result < Vec < Vec < String > > , Error > {
1925 let is_notebook = file_path. ends_with ( ".ipynb" ) ;
2026 let source = match fs:: read_to_string ( & file_path) . await {
2127 Ok ( s) => s,
22- Err ( e) => return file_read_error ( file_path, e. to_string ( ) ) ,
28+ Err ( e) => return Err ( file_read_error ( file_path, e. to_string ( ) ) ) ,
2329 } ;
2430
2531 let markdown = if is_notebook {
2632 match extract_markdown_from_notebook_source ( source) {
2733 Ok ( md) => md,
28- Err ( e) => return file_read_error ( file_path, e. to_string ( ) ) ,
34+ Err ( e) => return Err ( file_read_error ( file_path, e. to_string ( ) ) ) ,
2935 }
3036 } else {
3137 source
3238 } ;
3339
34- extract_links ( markdown)
40+ let anchors = extract_anchors_from_ref ( & markdown) ;
41+ match extract_links ( markdown) {
42+ Ok ( links) => Ok ( vec ! [ links, anchors] ) ,
43+ Err ( e) => Err ( Error :: from_reason ( e. to_string ( ) ) ) ,
44+ }
3545}
3646
37- use crate :: anchors:: extract_anchors_from_ref;
38-
39- mod anchors;
40-
4147/// Extract anchors from a markdown string. Anchors are either:
4248/// * slugified headings, deduplicated if the same heading appears more than once
4349/// * `id` props of HTML tags. These are not deduplicated as they should be unique per file
@@ -50,62 +56,5 @@ pub fn extract_anchors(markdown: String) -> Vec<String> {
5056/// (gfm), math, and JSX.
5157#[ napi]
5258pub fn extract_links ( markdown : String ) -> Result < Vec < String > , Error > {
53- let options = ParseOptions {
54- constructs : Constructs {
55- gfm_autolink_literal : true ,
56- gfm_footnote_definition : true ,
57- gfm_label_start_footnote : true ,
58- gfm_strikethrough : true ,
59- gfm_table : true ,
60- gfm_task_list_item : true ,
61- math_flow : true ,
62- math_text : true ,
63- mdx_jsx_flow : true ,
64- mdx_jsx_text : true ,
65- ..Constructs :: mdx ( )
66- } ,
67- ..ParseOptions :: mdx ( )
68- } ;
69-
70- let ast = match to_mdast ( markdown. as_str ( ) , & options) {
71- Ok ( ast) => ast,
72- Err ( m) => return Err ( Error :: from_reason ( m. to_string ( ) ) ) ,
73- } ;
74-
75- let mut links = HashSet :: < & String > :: default ( ) ;
76- extract_from_node ( & ast, & mut links) ;
77-
78- Ok ( links. into_iter ( ) . cloned ( ) . collect ( ) )
79- }
80-
81- fn extract_from_node < ' a > ( node : & ' a Node , links : & mut HashSet < & ' a String > ) {
82- let maybe_link = match node {
83- Node :: Image ( img) => Some ( & img. url ) ,
84- Node :: Link ( link) => Some ( & link. url ) ,
85- Node :: MdxJsxTextElement ( el) => extract_from_jsx_text_element ( el) ,
86- _ => None ,
87- } ;
88-
89- if let Some ( link) = maybe_link {
90- links. insert ( link) ;
91- }
92-
93- if let Some ( children) = node. children ( ) {
94- for child in children {
95- extract_from_node ( child, links) ;
96- }
97- }
98- }
99-
100- fn extract_from_jsx_text_element ( el : & MdxJsxTextElement ) -> Option < & String > {
101- let Some ( Some ( href_attr) ) = el. attributes . iter ( ) . find_map ( |attr| match attr {
102- AttributeContent :: Property ( p) if p. name == "href" => Some ( & p. value ) ,
103- _ => None ,
104- } ) else {
105- return None ;
106- } ;
107- match href_attr {
108- AttributeValue :: Literal ( s) => Some ( s) ,
109- _ => None ,
110- }
59+ links:: extract_links ( markdown)
11160}
0 commit comments