@@ -489,6 +489,7 @@ def test_parallel_mode_passes_params(monkeypatch):
489489 "multipage_sections" : False ,
490490 "combine_under_n_chars" : 501 ,
491491 "new_after_n_chars" : 1501 ,
492+ "max_characters" : 1502 ,
492493 },
493494 )
494495
@@ -511,6 +512,7 @@ def test_parallel_mode_passes_params(monkeypatch):
511512 multipage_sections = False ,
512513 combine_under_n_chars = 501 ,
513514 new_after_n_chars = 1501 ,
515+ max_characters = 1502 ,
514516 )
515517
516518
@@ -649,56 +651,45 @@ def test_chunking_strategy_param():
649651 assert "CompositeElement" in [element .get ("type" ) for element in response_with_chunking ]
650652
651653
652- # def test_chunking_strategy_additional_params():
653- # client = TestClient(app)
654- # test_file = Path("sample-docs") / "layout-parser-paper-fast.pdf"
655- # res = client.post(
656- # MAIN_API_ROUTE,
657- # files=[("files", (str(test_file), open(test_file, "rb")))],
658- # data={
659- # "chunking_strategy": "by_title",
660- # "multipage_sections": "False",
661- # "combine_under_n_chars": "0",
662- # },
663- # )
664- # response_from_multipage_false_combine_chars_0 = res.json()
665-
666- # res = client.post(
667- # MAIN_API_ROUTE,
668- # files=[("files", (str(test_file), open(test_file, "rb")))],
669- # data={
670- # "chunking_strategy": "by_title",
671- # "multipage_sections": "True",
672- # "combine_under_n_chars": "0",
673- # },
674- # )
675- # response_from_multipage_true_combine_chars_0 = res.json()
676-
677- # res = client.post(
678- # MAIN_API_ROUTE,
679- # files=[("files", (str(test_file), open(test_file, "rb")))],
680- # data={
681- # "chunking_strategy": "by_title",
682- # "multipage_sections": "True",
683- # "combine_under_n_chars": "5000",
684- # # Defining new_after_n_chars since it has to be greater than combine_under_n_chars
685- # "new_after_n_chars": "50000",
686- # },
687- # )
688- # response_multipage_true_combine_chars_5000 = res.json()
689-
690- # assert (
691- # response_multipage_true_combine_chars_5000
692- # != response_from_multipage_true_combine_chars_0
693- # )
694- # assert (
695- # response_from_multipage_true_combine_chars_0
696- # != response_from_multipage_false_combine_chars_0
697- # )
698- # assert (
699- # response_multipage_true_combine_chars_5000
700- # != response_from_multipage_false_combine_chars_0
701- # )
654+ # Defaults:
655+ # multippage = True, combine_text_under_n_chars = None, new_after_n_chars = None,
656+ # max_characters = 500
657+ @pytest .mark .parametrize (
658+ ("multipage_sections" , "combine_under_n_chars" , "new_after_n_chars" , "max_characters" ),
659+ [
660+ (False , None , None , 500 ), # test multipage_sections
661+ (True , 1000 , None , 5000 ), # test combine_under_n_chars
662+ (True , None , 10 , 500 ), # test new_after_n_chars
663+ (True , None , None , 100 ), # test max__characters
664+ ],
665+ )
666+ def test_chunking_strategy_additional_params (
667+ multipage_sections , combine_under_n_chars , new_after_n_chars , max_characters
668+ ):
669+ client = TestClient (app )
670+ test_file = Path ("sample-docs" ) / "layout-parser-paper-fast.pdf"
671+
672+ arg_resp = client .post (
673+ MAIN_API_ROUTE ,
674+ files = [("files" , (str (test_file ), open (test_file , "rb" )))],
675+ data = {
676+ "chunking_strategy" : "by_title" ,
677+ "multipage_sections" : multipage_sections ,
678+ "combine_under_n_chars" : combine_under_n_chars ,
679+ "new_after_n_chars" : new_after_n_chars ,
680+ "max_characters" : max_characters ,
681+ },
682+ )
683+ arg_resp_json = arg_resp .json ()
684+
685+ default_resp = client .post (
686+ MAIN_API_ROUTE ,
687+ files = [("files" , (str (test_file ), open (test_file , "rb" )))],
688+ data = {"chunking_strategy" : "by_title" },
689+ )
690+ default_resp_json = default_resp .json ()
691+
692+ assert arg_resp_json != default_resp_json
702693
703694
704695def test_encrypted_pdf ():
0 commit comments