1- use std:: convert:: TryInto ;
2- use std:: ffi:: CString ;
3- use std:: io:: Read ;
4- use std:: marker:: PhantomData ;
5- use std:: ptr;
1+ use std:: {
2+ convert:: TryInto ,
3+ ffi:: { c_int, c_void, CString } ,
4+ io:: Read ,
5+ marker:: PhantomData ,
6+ ptr:: { self , NonNull } ,
7+ slice,
8+ } ;
69
710use bitflags:: bitflags;
811use mupdf_sys:: * ;
912use num_enum:: TryFromPrimitive ;
1013
14+ use crate :: FFIAnalogue ;
1115use crate :: {
12- array:: FzArray , context, rust_slice_to_ffi_ptr, rust_vec_from_ffi_ptr, Buffer , Error , Image ,
13- Matrix , Point , Quad , Rect , WriteMode ,
16+ context, rust_slice_to_ffi_ptr, Buffer , Error , Image , Matrix , Point , Quad , Rect , WriteMode ,
1417} ;
1518
1619bitflags ! {
@@ -51,20 +54,111 @@ impl TextPage {
5154 }
5255 }
5356
54- pub fn search ( & self , needle : & str , hit_max : u32 ) -> Result < FzArray < Quad > , Error > {
57+ pub fn search ( & self , needle : & str ) -> Result < Vec < Quad > , Error > {
58+ let mut vec = Vec :: new ( ) ;
59+ self . search_cb ( needle, & mut vec, |v, quads| {
60+ v. extend ( quads. iter ( ) . cloned ( ) ) ;
61+ SearchHitResponse :: ContinueSearch
62+ } ) ?;
63+ Ok ( vec)
64+ }
65+
66+ /// Search through the page, finding all instances of `needle` and processing them through
67+ /// `cb`.
68+ /// Note that the `&[Quad]` given to `cb` in its invocation lives only during the time that
69+ /// `cb` is being evaluated. That means the following won't work or compile:
70+ ///
71+ /// ```compile_fail
72+ /// # use mupdf::{TextPage, Quad, text_page::SearchHitResponse};
73+ /// # let text_page: TextPage = todo!();
74+ /// let mut quads: Vec<&Quad> = Vec::new();
75+ /// text_page.search_cb("search term", &mut quads, |v, quads: &[Quad]| {
76+ /// v.extend(quads);
77+ /// SearchHitResponse::ContinueSearch
78+ /// }).unwrap();
79+ /// ```
80+ ///
81+ /// But the following will:
82+ /// ```no_run
83+ /// # use mupdf::{TextPage, Quad, text_page::SearchHitResponse};
84+ /// # let text_page: TextPage = todo!();
85+ /// let mut quads: Vec<Quad> = Vec::new();
86+ /// text_page.search_cb("search term", &mut quads, |v, quads: &[Quad]| {
87+ /// v.extend(quads.iter().cloned());
88+ /// SearchHitResponse::ContinueSearch
89+ /// }).unwrap();
90+ /// ```
91+ pub fn search_cb < T , F > ( & self , needle : & str , data : & mut T , cb : F ) -> Result < u32 , Error >
92+ where
93+ T : ?Sized ,
94+ F : Fn ( & mut T , & [ Quad ] ) -> SearchHitResponse ,
95+ {
96+ // This struct allows us to wrap both the callback that the user gave us and the data so
97+ // that we can pass it into the ffi callback nicely
98+ struct FnWithData < ' parent , T : ?Sized , F >
99+ where
100+ F : Fn ( & mut T , & [ Quad ] ) -> SearchHitResponse ,
101+ {
102+ data : & ' parent mut T ,
103+ f : F ,
104+ }
105+
106+ let mut opaque = FnWithData { data, f : cb } ;
107+
108+ // And then here's the `fn` that we'll pass in - it has to be an fn, not capturing context,
109+ // because it needs to be unsafe extern "C". to be used with FFI.
110+ unsafe extern "C" fn ffi_cb < T , F > (
111+ _ctx : * mut fz_context ,
112+ data : * mut c_void ,
113+ num_quads : c_int ,
114+ hit_bbox : * mut fz_quad ,
115+ ) -> c_int
116+ where
117+ T : ?Sized ,
118+ F : Fn ( & mut T , & [ Quad ] ) -> SearchHitResponse ,
119+ Quad : FFIAnalogue < FFIType = fz_quad > ,
120+ {
121+ // This is upheld by our `FFIAnalogue` bound above
122+ let quad_ptr = hit_bbox. cast :: < Quad > ( ) ;
123+ let Some ( nn) = NonNull :: new ( quad_ptr) else {
124+ return SearchHitResponse :: ContinueSearch as c_int ;
125+ } ;
126+
127+ // This guarantee is upheld by mupdf - they're giving us a pointer to the same type we
128+ // gave them.
129+ let data = data. cast :: < FnWithData < ' _ , T , F > > ( ) ;
130+
131+ // But if they like gave us a -1 for number of results or whatever, give up on
132+ // decoding.
133+ let Ok ( len) = usize:: try_from ( num_quads) else {
134+ return SearchHitResponse :: ContinueSearch as c_int ;
135+ } ;
136+
137+ // SAFETY: We've ensure nn is not null, and we're trusting the FFI layer for the other
138+ // invariants (about actually holding the data, etc)
139+ let slice = unsafe { slice:: from_raw_parts_mut ( nn. as_ptr ( ) , len) } ;
140+
141+ // Get the function and the data
142+ // SAFETY: Trusting that the FFI layer actually gave us this ptr
143+ let f = unsafe { & ( * data) . f } ;
144+ // SAFETY: Trusting that the FFI layer actually gave us this ptr
145+ let data = unsafe { & mut ( * data) . data } ;
146+
147+ // And call the function with the data
148+ f ( data, slice) as c_int
149+ }
150+
55151 let c_needle = CString :: new ( needle) ?;
56- let hit_max = if hit_max < 1 { 16 } else { hit_max } ;
57- let mut hit_count = 0 ;
58152 unsafe {
59- ffi_try ! ( mupdf_search_stext_page (
153+ ffi_try ! ( mupdf_search_stext_page_cb (
60154 context( ) ,
61155 self . inner,
62156 c_needle. as_ptr( ) ,
63- hit_max as _ ,
64- & mut hit_count
157+ Some ( ffi_cb :: < T , F > ) ,
158+ & raw mut opaque as * mut c_void
65159 ) )
66160 }
67- . and_then ( |quads| unsafe { rust_vec_from_ffi_ptr ( quads , hit_count ) } )
161+ . map ( |count| count as u32 )
68162 }
69163
70164 pub fn highlight_selection (
@@ -98,6 +192,12 @@ impl Drop for TextPage {
98192 }
99193}
100194
195+ #[ repr( i32 ) ]
196+ pub enum SearchHitResponse {
197+ ContinueSearch = 0 ,
198+ AbortSearch = 1 ,
199+ }
200+
101201#[ derive( Debug , Clone , Copy , PartialEq , TryFromPrimitive ) ]
102202#[ repr( u32 ) ]
103203pub enum TextBlockType {
@@ -262,7 +362,7 @@ impl<'a> Iterator for TextCharIter<'a> {
262362
263363#[ cfg( test) ]
264364mod test {
265- use crate :: { Document , TextPageOptions } ;
365+ use crate :: { text_page :: SearchHitResponse , Document , TextPageOptions } ;
266366
267367 #[ test]
268368 fn test_text_page_search ( ) {
@@ -271,31 +371,52 @@ mod test {
271371 let doc = Document :: open ( "tests/files/dummy.pdf" ) . unwrap ( ) ;
272372 let page0 = doc. load_page ( 0 ) . unwrap ( ) ;
273373 let text_page = page0. to_text_page ( TextPageOptions :: BLOCK_IMAGE ) . unwrap ( ) ;
274- let hits = text_page. search ( "Dummy" , 1 ) . unwrap ( ) ;
374+ let hits = text_page. search ( "Dummy" ) . unwrap ( ) ;
275375 assert_eq ! ( hits. len( ) , 1 ) ;
276376 assert_eq ! (
277377 & * hits,
278378 [ Quad {
279379 ul: Point {
280380 x: 56.8 ,
281- y: 69.32512
381+ y: 69.32953
282382 } ,
283383 ur: Point {
284- x: 115.85405 ,
285- y: 69.32512
384+ x: 115.85159 ,
385+ y: 69.32953
286386 } ,
287387 ll: Point {
288388 x: 56.8 ,
289- y: 87.311844
389+ y: 87.29713
290390 } ,
291391 lr: Point {
292- x: 115.85405 ,
293- y: 87.311844
392+ x: 115.85159 ,
393+ y: 87.29713
294394 }
295395 } ]
296396 ) ;
297397
298- let hits = text_page. search ( "Not Found" , 1 ) . unwrap ( ) ;
398+ let hits = text_page. search ( "Not Found" ) . unwrap ( ) ;
399+ assert_eq ! ( hits. len( ) , 0 ) ;
400+ }
401+
402+ #[ test]
403+ fn test_text_page_cb_search ( ) {
404+ let doc = Document :: open ( "tests/files/dummy.pdf" ) . unwrap ( ) ;
405+ let page0 = doc. load_page ( 0 ) . unwrap ( ) ;
406+ let text_page = page0. to_text_page ( TextPageOptions :: BLOCK_IMAGE ) . unwrap ( ) ;
407+ let mut sum_x = 0.0 ;
408+ let num_hits = text_page
409+ . search_cb ( "Dummy" , & mut sum_x, |acc, hits| {
410+ for q in hits {
411+ * acc += q. ul . x + q. ur . x + q. ll . x + q. lr . x ;
412+ }
413+ SearchHitResponse :: ContinueSearch
414+ } )
415+ . unwrap ( ) ;
416+ assert_eq ! ( num_hits, 1 ) ;
417+ assert_eq ! ( sum_x, 56.8 + 115.85159 + 56.8 + 115.85159 ) ;
418+
419+ let hits = text_page. search ( "Not Found" ) . unwrap ( ) ;
299420 assert_eq ! ( hits. len( ) , 0 ) ;
300421 }
301422}
0 commit comments