Skip to content

Commit 088ec34

Browse files
authored
fix ndjson in local chunker (#447)
1 parent 9a09113 commit 088ec34

File tree

3 files changed

+12
-3
lines changed

3 files changed

+12
-3
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
## 0.6.1
2+
3+
### Fixes
4+
5+
* **Handle NDJSON when using local chunker**
6+
17
## 0.6.0
28

39
### Features

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.6.0" # pragma: no cover
1+
__version__ = "0.6.1" # pragma: no cover

unstructured_ingest/v2/processes/chunker.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pydantic import BaseModel, Field, SecretStr
77

88
from unstructured_ingest.utils.chunking import assign_and_map_hash_ids
9+
from unstructured_ingest.utils.data_prep import get_json_data
910
from unstructured_ingest.utils.dep_check import requires_dependencies
1011
from unstructured_ingest.v2.interfaces.process import BaseProcess
1112
from unstructured_ingest.v2.logger import logger
@@ -92,9 +93,11 @@ def is_async(self) -> bool:
9293
@requires_dependencies(dependencies=["unstructured"])
9394
def run(self, elements_filepath: Path, **kwargs: Any) -> list[dict]:
9495
from unstructured.chunking import dispatch
95-
from unstructured.staging.base import elements_from_json
96+
from unstructured.staging.base import elements_from_dicts
9697

97-
elements = elements_from_json(filename=str(elements_filepath))
98+
element_dicts = get_json_data(elements_filepath)
99+
100+
elements = elements_from_dicts(element_dicts=element_dicts)
98101
if not elements:
99102
return [e.to_dict() for e in elements]
100103
local_chunking_strategies = ("basic", "by_title")

0 commit comments

Comments
 (0)