@@ -3230,7 +3230,7 @@ def _map_tool_definition(f: ToolDefinition) -> ChatCompletionInputTool:
3230
3230
assert "from huggingface_hub import AsyncInferenceClient, ChatCompletionInputTool" not in new_code # conditional from import
3231
3231
3232
3232
3233
- def test_test ():
3233
+ def test_duplicate_global_assignments_when_reverting_helpers ():
3234
3234
root_dir = Path (__file__ ).parent .parent .resolve ()
3235
3235
main_file = Path (root_dir / "code_to_optimize/temp_main.py" ).resolve ()
3236
3236
@@ -3244,42 +3244,14 @@ def test_test():
3244
3244
3245
3245
import regex
3246
3246
from typing_extensions import Self, TypeAlias
3247
-
3248
- from unstructured.common.html_table import HtmlCell, HtmlRow, HtmlTable
3249
- from unstructured.documents.elements import (
3250
- CompositeElement,
3251
- ConsolidationStrategy,
3252
- Element,
3253
- ElementMetadata,
3254
- Table,
3255
- TableChunk,
3256
- Title,
3257
- )
3258
3247
from unstructured.utils import lazyproperty
3248
+ from unstructured.documents.elements import Element
3259
3249
3260
3250
# ================================================================================================
3261
3251
# MODEL
3262
3252
# ================================================================================================
3263
3253
3264
3254
CHUNK_MAX_CHARS_DEFAULT: int = 500
3265
- """Hard-max chunk-length when no explicit value specified in `max_characters` argument.
3266
-
3267
- Provided for reference only, for example so the ingest CLI can advertise the default value in its
3268
- UI. External chunking-related functions (e.g. in ingest or decorators) should use
3269
- `max_characters: int | None = None` and not apply this default themselves. Only
3270
- `ChunkingOptions.max_characters` should apply a default value.
3271
- """
3272
-
3273
- CHUNK_MULTI_PAGE_DEFAULT: bool = True
3274
- """When False, respect page-boundaries (no two elements from different page in same chunk).
3275
-
3276
- Only operative for "by_title" chunking strategy.
3277
- """
3278
-
3279
- BoundaryPredicate: TypeAlias = Callable[[Element], bool]
3280
- """Detects when element represents crossing a semantic boundary like section or page."""
3281
-
3282
- TextAndHtml: TypeAlias = tuple[str, str]
3283
3255
3284
3256
# ================================================================================================
3285
3257
# PRE-CHUNKER
@@ -3395,42 +3367,14 @@ def _is_in_new_semantic_unit(self, element: Element) -> bool:
3395
3367
3396
3368
import regex
3397
3369
from typing_extensions import Self, TypeAlias
3398
-
3399
- from unstructured.common.html_table import HtmlCell, HtmlRow, HtmlTable
3400
- from unstructured.documents.elements import (
3401
- CompositeElement,
3402
- ConsolidationStrategy,
3403
- Element,
3404
- ElementMetadata,
3405
- Table,
3406
- TableChunk,
3407
- Title,
3408
- )
3409
3370
from unstructured.utils import lazyproperty
3371
+ from unstructured.documents.elements import Element
3410
3372
3411
3373
# ================================================================================================
3412
3374
# MODEL
3413
3375
# ================================================================================================
3414
3376
3415
3377
CHUNK_MAX_CHARS_DEFAULT: int = 500
3416
- """Hard-max chunk-length when no explicit value specified in `max_characters` argument.
3417
-
3418
- Provided for reference only, for example so the ingest CLI can advertise the default value in its
3419
- UI. External chunking-related functions (e.g. in ingest or decorators) should use
3420
- `max_characters: int | None = None` and not apply this default themselves. Only
3421
- `ChunkingOptions.max_characters` should apply a default value.
3422
- """
3423
-
3424
- CHUNK_MULTI_PAGE_DEFAULT: bool = True
3425
- """When False, respect page-boundaries (no two elements from different page in same chunk).
3426
-
3427
- Only operative for "by_title" chunking strategy.
3428
- """
3429
-
3430
- BoundaryPredicate: TypeAlias = Callable[[Element], bool]
3431
- """Detects when element represents crossing a semantic boundary like section or page."""
3432
-
3433
- TextAndHtml: TypeAlias = tuple[str, str]
3434
3378
3435
3379
# ================================================================================================
3436
3380
# PRE-CHUNKER
0 commit comments