@@ -95,7 +95,7 @@ type FToolIntegrationTests() =
9595 <w:p><w:r><w:t>Hello Word DOCX</w:t></w:r></w:p>
9696 <w:p><w:r><w:t>Count me 2 times</w:t></w:r></w:p>
9797 </w:body>
98- </w:document>
98+ </w:document>
9999"""
100100
101101 let docxPath = Path.Combine( tmp.FullName, " sample.docx" )
@@ -178,6 +178,189 @@ type FToolUnitTests() =
178178 with _ ->
179179 ()
180180
181+ [<Test>]
182+ member _. ``tryReadPdf default ( pdftotext missing ) and custom exe success`` () =
183+ let tmp = Path.Combine( Path.GetTempPath(), " f_pdf_custom_" + Guid.NewGuid() .ToString( " N" ))
184+ Directory.CreateDirectory( tmp) |> ignore
185+
186+ try
187+ let pdfPath = Path.Combine( tmp, " doc.pdf" )
188+ let content = " Hello from custom exe"
189+ File.WriteAllText( pdfPath, content, UTF8Encoding( false ))
190+
191+ Environment.SetEnvironmentVariable( " ANCP_PDFTOTEXT" , null )
192+ let noneDefault = Program.tryReadPdf pdfPath
193+ Assert.That( noneDefault, Is.EqualTo( None))
194+
195+ let toolPath = Path.Combine( tmp, " echo_pdf.sh" )
196+ let script =
197+ """ #!/usr/bin/env bash
198+ set -euo pipefail
199+ # Find first arg that is an existing file and cat it
200+ for a in "$@"; do
201+ if [ -f "$a" ]; then
202+ cat "$a"
203+ exit 0
204+ fi
205+ done
206+ exit 1
207+ """
208+ File.WriteAllText( toolPath, script, UTF8Encoding( false ))
209+
210+ if not ( OperatingSystem.IsWindows()) then
211+ try
212+ File.SetUnixFileMode( toolPath, UnixFileMode.UserRead |||
213+ UnixFileMode.UserExecute |||
214+ UnixFileMode.UserWrite |||
215+ UnixFileMode.GroupRead |||
216+ UnixFileMode.GroupExecute |||
217+ UnixFileMode.OtherRead |||
218+ UnixFileMode.OtherExecute)
219+ with _ ->
220+ let psi = ProcessStartInfo( " /bin/chmod" , $" +x \" {toolPath}\" " )
221+ psi.RedirectStandardError <- true
222+ psi.RedirectStandardOutput <- true
223+ use p = Process.Start( psi)
224+ p.WaitForExit()
225+
226+ Environment.SetEnvironmentVariable( " ANCP_PDFTOTEXT" , toolPath)
227+ let ok = Program.tryReadPdf pdfPath
228+ Assert.That( ok, Is.EqualTo( Some content))
229+ finally
230+ Environment.SetEnvironmentVariable( " ANCP_PDFTOTEXT" , null )
231+ try Directory.Delete( tmp, true ) with _ -> ()
232+
233+ [<Test>]
234+ member _. ``tryReadPdf default success via PATH stub ( non - Windows ) ``() =
235+ if OperatingSystem.IsWindows() then
236+ Assert.Pass( " Skipped on Windows" )
237+ else
238+ let tmp = Path.Combine( Path.GetTempPath(), " f_pdf_path_" + Guid.NewGuid() .ToString( " N" ))
239+ Directory.CreateDirectory( tmp) |> ignore
240+ let oldPath = Environment.GetEnvironmentVariable( " PATH" )
241+ try
242+ let pdfPath = Path.Combine( tmp, " doc.pdf" )
243+ let content = " Hello from PATH pdftotext"
244+ File.WriteAllText( pdfPath, content, UTF8Encoding( false ))
245+
246+ let toolDir = Directory.CreateDirectory( Path.Combine( tmp, " bin" )) .FullName
247+ let toolPath = Path.Combine( toolDir, " pdftotext" )
248+ let script =
249+ """ #!/usr/bin/env bash
250+ set -euo pipefail
251+ for a in "$@"; do
252+ if [ -f "$a" ]; then
253+ cat "$a"
254+ exit 0
255+ fi
256+ done
257+ exit 1
258+ """
259+ File.WriteAllText( toolPath, script, UTF8Encoding( false ))
260+ File.SetUnixFileMode( toolPath,
261+ UnixFileMode.UserRead ||| UnixFileMode.UserExecute |||
262+ UnixFileMode.UserWrite ||| UnixFileMode.GroupRead |||
263+ UnixFileMode.GroupExecute ||| UnixFileMode.OtherRead |||
264+ UnixFileMode.OtherExecute)
265+
266+ let newPath = ( toolDir + " :" + ( if String.IsNullOrEmpty( oldPath) then " " else oldPath))
267+ Environment.SetEnvironmentVariable( " PATH" , newPath)
268+ Environment.SetEnvironmentVariable( " ANCP_PDFTOTEXT" , null )
269+
270+ let res = Program.tryReadPdf pdfPath
271+ Assert.That( res, Is.EqualTo( Some content))
272+ finally
273+ Environment.SetEnvironmentVariable( " PATH" , oldPath)
274+ Environment.SetEnvironmentVariable( " ANCP_PDFTOTEXT" , null )
275+ try Directory.Delete( tmp, true ) with _ -> ()
276+
277+ [<Test>]
278+ member _. ``tryReadPdf custom exe failure returns None`` () =
279+ let tmp = Path.Combine( Path.GetTempPath(), " f_pdf_custom_fail_" + Guid.NewGuid() .ToString( " N" ))
280+ Directory.CreateDirectory( tmp) |> ignore
281+ try
282+ let pdfPath = Path.Combine( tmp, " doc.pdf" )
283+ File.WriteAllText( pdfPath, " content" , UTF8Encoding( false ))
284+
285+ let toolPath = Path.Combine( tmp, " fail_pdf.sh" )
286+ let script =
287+ """ #!/usr/bin/env bash
288+ exit 3
289+ """
290+ File.WriteAllText( toolPath, script, UTF8Encoding( false ))
291+ if not ( OperatingSystem.IsWindows()) then
292+ try
293+ File.SetUnixFileMode( toolPath, UnixFileMode.UserRead |||
294+ UnixFileMode.UserExecute)
295+ with _ ->
296+ let psi = ProcessStartInfo( " /bin/chmod" , $" +x \" {toolPath}\" " )
297+ psi.RedirectStandardError <- true
298+ psi.RedirectStandardOutput <- true
299+ use p = Process.Start( psi)
300+ p.WaitForExit()
301+
302+ Environment.SetEnvironmentVariable( " ANCP_PDFTOTEXT" , toolPath)
303+ let res = Program.tryReadPdf pdfPath
304+ Assert.That( res, Is.EqualTo( None))
305+ finally
306+ Environment.SetEnvironmentVariable( " ANCP_PDFTOTEXT" , null )
307+ try Directory.Delete( tmp, true ) with _ -> ()
308+
309+ [<Test>]
310+ member _. ``main handles UnauthorizedAccessException`` () =
311+ if OperatingSystem.IsWindows() then
312+ Assert.Pass( " Skipped on Windows" )
313+ else
314+ let tmp = Path.Combine( Path.GetTempPath(), " f_unauth_" + Guid.NewGuid() .ToString( " N" ))
315+ Directory.CreateDirectory( tmp) |> ignore
316+ try
317+ let path = Path.Combine( tmp, " secret.txt" )
318+ File.WriteAllText( path, " secret" , UTF8Encoding( false ))
319+
320+ File.SetUnixFileMode( path, enum 0 )
321+
322+ let repoRoot =
323+ let rec findUp ( startDir : string ) ( marker : string ) =
324+ let full = Path.GetFullPath( startDir)
325+ let candidate = Path.Combine( full, marker)
326+ if File.Exists( candidate) then full
327+ else
328+ let parent = Directory.GetParent( full)
329+ if isNull parent then failwithf $" Could not locate '%s {marker}' above '%s {startDir}'"
330+ else findUp parent.FullName marker
331+ findUp AppContext.BaseDirectory " ancpdevkit.sln"
332+
333+ let proj = Path.Combine( repoRoot, " tools" , " ancp" , " f" , " f.fsproj" )
334+ let psi = ProcessStartInfo( " dotnet" , $" run --no-build --project {proj} --framework net9.0 -- {path}" )
335+ psi.WorkingDirectory <- repoRoot
336+ psi.RedirectStandardOutput <- true
337+ psi.RedirectStandardError <- true
338+ use p = Process.Start( psi)
339+ let _outStr = p.StandardOutput.ReadToEnd()
340+ let errStr = p.StandardError.ReadToEnd()
341+ p.WaitForExit()
342+
343+ Assert.That( p.ExitCode, Is.EqualTo( 1 ))
344+ Assert.That( errStr.Contains( " Error:" ), Is.True)
345+ finally
346+ try File.SetUnixFileMode( Path.Combine( tmp, " secret.txt" ),
347+ UnixFileMode.UserRead ||| UnixFileMode.UserWrite) with _ -> ()
348+ try Directory.Delete( tmp, true ) with _ -> ()
349+
350+ [<Test>]
351+ member _. ``readInput pdf FAIL propagates to error`` () =
352+ let tmp = Path.Combine( Path.GetTempPath(), " f_pdf_fail_" + Guid.NewGuid() .ToString( " N" ))
353+ Directory.CreateDirectory( tmp) |> ignore
354+
355+ try
356+ let pdfPath = Path.Combine( tmp, " doc.pdf" )
357+ File.WriteAllText( pdfPath, " dummy" , UTF8Encoding( false ))
358+ Environment.SetEnvironmentVariable( " ANCP_PDFTOTEXT" , " FAIL" )
359+ Assert.Throws< Exception>( fun () -> Program.readInput [| pdfPath |] |> ignore) |> ignore
360+ finally
361+ Environment.SetEnvironmentVariable( " ANCP_PDFTOTEXT" , null )
362+ try Directory.Delete( tmp, true ) with _ -> ()
363+
181364 [<Test>]
182365 member _. ``readInput handles pdf and invalid args`` () =
183366 let tmp = Path.Combine( Path.GetTempPath(), " f_inp_" + Guid.NewGuid() .ToString( " N" ))
@@ -400,3 +583,42 @@ type FToolUnitTests() =
400583 Assert.That( output2.Trim(), Is.EqualTo( " 3" ))
401584 finally
402585 try Directory.Delete( tmp, true ) with _ -> ()
586+
587+ [<Test>]
588+ member _. ``main handles malformed docx ( generic catch ) ``() =
589+ let tmp = Path.Combine( Path.GetTempPath(), " f_docx_bad_" + Guid.NewGuid() .ToString( " N" ))
590+ Directory.CreateDirectory( tmp) |> ignore
591+ try
592+ let docxPath = Path.Combine( tmp, " bad.docx" )
593+ do
594+ use fs = File.Create( docxPath)
595+ use za = new System.IO.Compression.ZipArchive( fs, System.IO.Compression.ZipArchiveMode.Create)
596+ let entry = za.CreateEntry( " word/document.xml" )
597+ use es = entry.Open()
598+ let bytes = Encoding.UTF8.GetBytes( " <w:document>oops" )
599+ es.Write( bytes, 0 , bytes.Length)
600+
601+ let repoRoot =
602+ let rec findUp ( startDir : string ) ( marker : string ) =
603+ let full = Path.GetFullPath( startDir)
604+ let candidate = Path.Combine( full, marker)
605+ if File.Exists( candidate) then full else
606+ let parent = Directory.GetParent( full)
607+ if isNull parent then failwithf $" Could not locate '%s {marker}' above '%s {startDir}'" else
608+ findUp parent.FullName marker
609+ findUp AppContext.BaseDirectory " ancpdevkit.sln"
610+
611+ let proj = Path.Combine( repoRoot, " tools" , " ancp" , " f" , " f.fsproj" )
612+ let psi = ProcessStartInfo( " dotnet" , $" run --no-build --project {proj} --framework net9.0 -- {docxPath}" )
613+ psi.WorkingDirectory <- repoRoot
614+ psi.RedirectStandardOutput <- true
615+ psi.RedirectStandardError <- true
616+ use p = Process.Start( psi)
617+ let _outStr = p.StandardOutput.ReadToEnd()
618+ let errStr = p.StandardError.ReadToEnd()
619+ p.WaitForExit()
620+
621+ Assert.That( p.ExitCode, Is.EqualTo( 1 ))
622+ Assert.That( errStr.Contains( " Error:" ), Is.True)
623+ finally
624+ try Directory.Delete( tmp, true ) with _ -> ()
0 commit comments