@@ -202,5 +202,161 @@ def test_autocomplete_nonexistent_first_token(self):
202202 assert response .get_json () == []
203203
204204
205+ # ---------------------------------------------------------------------------
206+ # CLI command tests: export-data / import-data
207+ # ---------------------------------------------------------------------------
208+
209+ class TestExportData :
210+ """Test the flask export-data CLI command."""
211+
212+ def test_export_creates_tarball_with_pdfs (self , tmp_path ):
213+ """Exporting should create a tar.gz containing all PDFs."""
214+ import app as app_module
215+ import tarfile
216+
217+ pdf_dir = tmp_path / "pdfs"
218+ pdf_dir .mkdir ()
219+ (pdf_dir / "report-a.pdf" ).write_bytes (b"%PDF-fake-a" )
220+ (pdf_dir / "report-b.pdf" ).write_bytes (b"%PDF-fake-b" )
221+
222+ output_file = tmp_path / "export.tar.gz"
223+
224+ runner = app_module .app .test_cli_runner (mix_stderr = False )
225+ with patch .object (app_module , "PDF_DIR" , pdf_dir ):
226+ result = runner .invoke (args = ["export-data" , str (output_file )])
227+
228+ assert result .exit_code == 0
229+ assert "Exported 2 PDFs" in result .output
230+
231+ with tarfile .open (str (output_file ), "r:gz" ) as tar :
232+ names = sorted (tar .getnames ())
233+ assert names == ["report-a.pdf" , "report-b.pdf" ]
234+
235+ def test_export_no_pdfs_found (self , tmp_path ):
236+ """Exporting when no PDFs exist should print a message."""
237+ import app as app_module
238+
239+ pdf_dir = tmp_path / "pdfs"
240+ pdf_dir .mkdir ()
241+ output_file = tmp_path / "export.tar.gz"
242+
243+ runner = app_module .app .test_cli_runner (mix_stderr = False )
244+ with patch .object (app_module , "PDF_DIR" , pdf_dir ):
245+ result = runner .invoke (args = ["export-data" , str (output_file )])
246+
247+ assert result .exit_code == 0
248+ assert "No PDF files found" in result .output
249+ assert not output_file .exists ()
250+
251+ def test_export_missing_directory (self , tmp_path ):
252+ """Exporting when pdf dir doesn't exist should print an error."""
253+ import app as app_module
254+
255+ nonexistent = tmp_path / "nonexistent"
256+ output_file = tmp_path / "export.tar.gz"
257+
258+ runner = app_module .app .test_cli_runner (mix_stderr = False )
259+ with patch .object (app_module , "PDF_DIR" , nonexistent ):
260+ result = runner .invoke (args = ["export-data" , str (output_file )])
261+
262+ assert result .exit_code == 0
263+ assert "does not exist" in result .output
264+
265+
266+ class TestImportData :
267+ """Test the flask import-data CLI command."""
268+
269+ def test_import_extracts_pdfs (self , tmp_path ):
270+ """Importing should extract PDFs from tarball."""
271+ import app as app_module
272+ import tarfile
273+
274+ archive_path = tmp_path / "import.tar.gz"
275+ pdf_content_a = b"%PDF-fake-a"
276+ pdf_content_b = b"%PDF-fake-b"
277+
278+ src_dir = tmp_path / "src"
279+ src_dir .mkdir ()
280+ (src_dir / "report-a.pdf" ).write_bytes (pdf_content_a )
281+ (src_dir / "report-b.pdf" ).write_bytes (pdf_content_b )
282+
283+ with tarfile .open (str (archive_path ), "w:gz" ) as tar :
284+ tar .add (str (src_dir / "report-a.pdf" ), arcname = "report-a.pdf" )
285+ tar .add (str (src_dir / "report-b.pdf" ), arcname = "report-b.pdf" )
286+
287+ dest_dir = tmp_path / "data_pdfs"
288+
289+ runner = app_module .app .test_cli_runner (mix_stderr = False )
290+ with patch .object (app_module , "PDF_DIR" , dest_dir ):
291+ result = runner .invoke (args = ["import-data" , str (archive_path )])
292+
293+ assert result .exit_code == 0
294+ assert "Imported 2 PDFs" in result .output
295+ assert (dest_dir / "report-a.pdf" ).read_bytes () == pdf_content_a
296+ assert (dest_dir / "report-b.pdf" ).read_bytes () == pdf_content_b
297+
298+ def test_import_skips_non_pdf_files (self , tmp_path ):
299+ """Importing should only extract .pdf files, ignoring others."""
300+ import app as app_module
301+ import tarfile
302+
303+ archive_path = tmp_path / "import.tar.gz"
304+ src_dir = tmp_path / "src"
305+ src_dir .mkdir ()
306+ (src_dir / "report.pdf" ).write_bytes (b"%PDF-fake" )
307+ (src_dir / "readme.txt" ).write_bytes (b"not a pdf" )
308+
309+ with tarfile .open (str (archive_path ), "w:gz" ) as tar :
310+ tar .add (str (src_dir / "report.pdf" ), arcname = "report.pdf" )
311+ tar .add (str (src_dir / "readme.txt" ), arcname = "readme.txt" )
312+
313+ dest_dir = tmp_path / "data_pdfs"
314+
315+ runner = app_module .app .test_cli_runner (mix_stderr = False )
316+ with patch .object (app_module , "PDF_DIR" , dest_dir ):
317+ result = runner .invoke (args = ["import-data" , str (archive_path )])
318+
319+ assert result .exit_code == 0
320+ assert "Imported 1 PDFs" in result .output
321+ assert (dest_dir / "report.pdf" ).exists ()
322+ assert not (dest_dir / "readme.txt" ).exists ()
323+
324+ def test_import_missing_archive (self , tmp_path ):
325+ """Importing a nonexistent file should print an error."""
326+ import app as app_module
327+
328+ runner = app_module .app .test_cli_runner (mix_stderr = False )
329+ result = runner .invoke (args = ["import-data" , str (tmp_path / "nonexistent.tar.gz" )])
330+
331+ assert result .exit_code == 0
332+ assert "does not exist" in result .output
333+
334+ def test_export_then_import_roundtrip (self , tmp_path ):
335+ """Export and then import should produce identical files."""
336+ import app as app_module
337+
338+ src_pdf_dir = tmp_path / "src_pdfs"
339+ src_pdf_dir .mkdir ()
340+ pdf_content = b"%PDF-roundtrip-test"
341+ (src_pdf_dir / "vsbericht-bund-2020.pdf" ).write_bytes (pdf_content )
342+
343+ archive_path = tmp_path / "roundtrip.tar.gz"
344+
345+ runner = app_module .app .test_cli_runner (mix_stderr = False )
346+
347+ # Export
348+ with patch .object (app_module , "PDF_DIR" , src_pdf_dir ):
349+ result = runner .invoke (args = ["export-data" , str (archive_path )])
350+ assert result .exit_code == 0
351+
352+ # Import into a different directory
353+ dest_pdf_dir = tmp_path / "dest_pdfs"
354+ with patch .object (app_module , "PDF_DIR" , dest_pdf_dir ):
355+ result = runner .invoke (args = ["import-data" , str (archive_path )])
356+ assert result .exit_code == 0
357+
358+ assert (dest_pdf_dir / "vsbericht-bund-2020.pdf" ).read_bytes () == pdf_content
359+
360+
205361if __name__ == '__main__' :
206362 pytest .main ([__file__ , '-v' ])
0 commit comments