@@ -50,6 +50,7 @@ struct OcrExtraction {
5050 prolactin : Option < OcrValue > ,
5151 shbg : Option < OcrValue > ,
5252 fai : Option < OcrValue > ,
53+ sample_date : Option < String > ,
5354}
5455
5556#[ derive( Clone , Debug , Deserialize ) ]
@@ -85,8 +86,13 @@ fn apply_ocr_extraction(
8586 shbg_level : RwSignal < String > ,
8687 shbg_unit : RwSignal < String > ,
8788 free_androgen_index : RwSignal < String > ,
89+ test_date_time : RwSignal < String > ,
8890) -> usize {
8991 let mut filled = 0 ;
92+ if let Some ( date) = extracted. sample_date {
93+ test_date_time. set ( date) ;
94+ filled += 1 ;
95+ }
9096 if let Some ( value) = extracted. estradiol {
9197 estradiol_level. set ( value. value ) ;
9298 if let Some ( unit) = value. unit {
@@ -259,6 +265,94 @@ fn extract_ocr_value(text: &str, labels: &[&str]) -> Option<OcrValue> {
259265 None
260266}
261267
268+ fn month_from_name ( name : & str ) -> Option < u32 > {
269+ match name {
270+ "jan" | "january" => Some ( 1 ) ,
271+ "feb" | "february" => Some ( 2 ) ,
272+ "mar" | "march" => Some ( 3 ) ,
273+ "apr" | "april" => Some ( 4 ) ,
274+ "may" => Some ( 5 ) ,
275+ "jun" | "june" => Some ( 6 ) ,
276+ "jul" | "july" => Some ( 7 ) ,
277+ "aug" | "august" => Some ( 8 ) ,
278+ "sep" | "september" => Some ( 9 ) ,
279+ "oct" | "october" => Some ( 10 ) ,
280+ "nov" | "november" => Some ( 11 ) ,
281+ "dec" | "december" => Some ( 12 ) ,
282+ _ => None ,
283+ }
284+ }
285+
286+ fn try_parse_date ( tokens : & [ & str ] ) -> Option < String > {
287+ if tokens. is_empty ( ) {
288+ return None ;
289+ }
290+ // Try DD/MM/YYYY or DD-MM-YYYY
291+ let first = tokens[ 0 ] ;
292+ for sep in [ '/' , '-' ] {
293+ let parts: Vec < & str > = first. split ( sep) . collect ( ) ;
294+ if parts. len ( ) == 3 {
295+ let a: u32 = parts[ 0 ] . parse ( ) . ok ( ) ?;
296+ let b: u32 = parts[ 1 ] . parse ( ) . ok ( ) ?;
297+ let c: u32 = parts[ 2 ] . parse ( ) . ok ( ) ?;
298+ let ( year, month, day) = if c > 100 {
299+ // DD/MM/YYYY
300+ ( c, b, a)
301+ } else if a > 100 {
302+ // YYYY/MM/DD
303+ ( a, b, c)
304+ } else {
305+ return None ;
306+ } ;
307+ if ( 1 ..=12 ) . contains ( & month) && ( 1 ..=31 ) . contains ( & day) {
308+ return Some ( format ! ( "{year:04}-{month:02}-{day:02}T12:00" ) ) ;
309+ }
310+ }
311+ }
312+ // Try DD MMM YYYY (tokens: ["15", "Mar", "2025"])
313+ if tokens. len ( ) >= 3 {
314+ if let Ok ( day) = tokens[ 0 ] . parse :: < u32 > ( ) {
315+ if let Some ( month) = month_from_name ( & tokens[ 1 ] . to_lowercase ( ) ) {
316+ if let Ok ( year) = tokens[ 2 ] . trim_end_matches ( |c : char | !c. is_ascii_digit ( ) ) . parse :: < u32 > ( ) {
317+ if ( 1 ..=31 ) . contains ( & day) && year > 1900 {
318+ return Some ( format ! ( "{year:04}-{month:02}-{day:02}T12:00" ) ) ;
319+ }
320+ }
321+ }
322+ }
323+ }
324+ None
325+ }
326+
327+ fn extract_sample_date ( text : & str ) -> Option < String > {
328+ let lower = text. to_lowercase ( ) ;
329+ let labels = [
330+ "sample collection date" ,
331+ "collection date" ,
332+ "date collected" ,
333+ "date of collection" ,
334+ "sample date" ,
335+ "specimen collection date" ,
336+ "specimen date" ,
337+ "collected on" ,
338+ "collected date" ,
339+ "date of sample" ,
340+ ] ;
341+ for label in labels {
342+ if let Some ( idx) = lower. find ( label) {
343+ let start = idx + label. len ( ) ;
344+ let end = ( start + 80 ) . min ( text. len ( ) ) ;
345+ let window = & text[ start..end] ;
346+ let trimmed = window. trim_start_matches ( [ ':' , ' ' , '\t' ] ) ;
347+ let tokens: Vec < & str > = trimmed. split_whitespace ( ) . collect ( ) ;
348+ if let Some ( date) = try_parse_date ( & tokens) {
349+ return Some ( date) ;
350+ }
351+ }
352+ }
353+ None
354+ }
355+
262356fn extract_ocr_values ( text : & str ) -> OcrExtraction {
263357 let cleaned = text. replace ( '\r' , "\n " ) ;
264358 OcrExtraction {
@@ -280,6 +374,7 @@ fn extract_ocr_values(text: &str) -> OcrExtraction {
280374 prolactin : extract_ocr_value ( & cleaned, & [ "prolactin" ] ) ,
281375 shbg : extract_ocr_value ( & cleaned, & [ "sex hormone binding globulin" , "shbg" ] ) ,
282376 fai : extract_ocr_value ( & cleaned, & [ "free androgen index" , "fai" ] ) ,
377+ sample_date : extract_sample_date ( & cleaned) ,
283378 }
284379}
285380
@@ -369,6 +464,7 @@ pub fn CreateBloodTest() -> impl IntoView {
369464 let shbg_level = shbg_level;
370465 let shbg_unit = shbg_unit;
371466 let free_androgen_index = free_androgen_index;
467+ let test_date_time = test_date_time;
372468 move |ev : leptos:: ev:: Event | {
373469 if ocr_busy. get ( ) {
374470 return ;
@@ -481,6 +577,7 @@ pub fn CreateBloodTest() -> impl IntoView {
481577 shbg_level,
482578 shbg_unit,
483579 free_androgen_index,
580+ test_date_time,
484581 ) ;
485582 if filled == 0 {
486583 ocr_error. set ( Some ( "OCR ran, but no lab values were found." . to_string ( ) ) ) ;
@@ -523,6 +620,7 @@ pub fn CreateBloodTest() -> impl IntoView {
523620 let shbg_level = shbg_level;
524621 let shbg_unit = shbg_unit;
525622 let free_androgen_index = free_androgen_index;
623+ let test_date_time = test_date_time;
526624 move |ev : leptos:: ev:: Event | {
527625 if pdf_busy. get ( ) {
528626 return ;
@@ -643,6 +741,7 @@ pub fn CreateBloodTest() -> impl IntoView {
643741 shbg_level,
644742 shbg_unit,
645743 free_androgen_index,
744+ test_date_time,
646745 ) ;
647746 }
648747 if item. extract_error . is_some ( ) {
0 commit comments