Skip to content

Commit 41cb7a7

Browse files
rbiseck3benjats07
andauthored
rename large model to chipper (#145)
* rename large model to chipper --------- Co-authored-by: Benjamin Torres <[email protected]>
1 parent e682ab1 commit 41cb7a7

File tree

5 files changed

+23
-22
lines changed

5 files changed

+23
-22
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
## 0.5.5-dev3
1+
## 0.5.5
22

3+
* Rename large model to chipper
34
* Added functionality to write images to computer storage temporarily instead of keeping them in memory for `pdf2image.convert_from_path`
45
* Added functionality to convert a PDF in small chunks of pages at a time for `pdf2image.convert_from_path`
56
* Table processing check for the area of the package to fix division by zero bug

test_unstructured_inference/models/test_largemodel.py renamed to test_unstructured_inference/models/test_chippermodel.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,24 @@
33
import pytest
44
from PIL import Image
55

6-
from unstructured_inference.models import largemodel
6+
from unstructured_inference.models import chipper
77

88

99
def test_initialize():
1010
with mock.patch.object(
11-
largemodel.AutoTokenizer,
11+
chipper.AutoTokenizer,
1212
"from_pretrained",
1313
) as mock_tokenizer, mock.patch.object(
14-
largemodel,
14+
chipper,
1515
"DonutProcessor",
1616
) as mock_donut_processor, mock.patch.object(
17-
largemodel,
17+
chipper,
1818
"DonutImageProcessor",
1919
) as mock_donut_image_processor, mock.patch.object(
20-
largemodel.VisionEncoderDecoderModel,
20+
chipper.VisionEncoderDecoderModel,
2121
"from_pretrained",
2222
) as mock_vision_encoder_decoder_model:
23-
model = largemodel.UnstructuredLargeModel()
23+
model = chipper.UnstructuredChipperModel()
2424
model.initialize("", "", "")
2525
mock_tokenizer.assert_called_once()
2626
mock_donut_processor.assert_called_once()
@@ -44,8 +44,8 @@ def mock_initialize(self, *arg, **kwargs):
4444

4545

4646
def test_predict_tokens():
47-
with mock.patch.object(largemodel.UnstructuredLargeModel, "initialize", mock_initialize):
48-
model = largemodel.UnstructuredLargeModel()
47+
with mock.patch.object(chipper.UnstructuredChipperModel, "initialize", mock_initialize):
48+
model = chipper.UnstructuredChipperModel()
4949
model.initialize()
5050
with open("sample-docs/loremipsum.png", "rb") as fp:
5151
im = Image.open(fp)
@@ -64,9 +64,9 @@ def test_predict_tokens():
6464
],
6565
)
6666
def test_postprocess(decoded_str, expected_classes):
67-
with mock.patch.object(largemodel.UnstructuredLargeModel, "initialize", mock_initialize):
67+
with mock.patch.object(chipper.UnstructuredChipperModel, "initialize", mock_initialize):
6868
pass
69-
model = largemodel.UnstructuredLargeModel()
69+
model = chipper.UnstructuredChipperModel()
7070
tokenizer_model = "xlm-roberta-large"
7171
pre_trained_model = "nielsr/donut-base"
7272
model.initialize(tokenizer_model, pre_trained_model, None)
@@ -81,13 +81,13 @@ def test_postprocess(decoded_str, expected_classes):
8181

8282
def test_predict():
8383
with mock.patch.object(
84-
largemodel.UnstructuredLargeModel,
84+
chipper.UnstructuredChipperModel,
8585
"predict_tokens",
8686
) as mock_predict_tokens, mock.patch.object(
87-
largemodel.UnstructuredLargeModel,
87+
chipper.UnstructuredChipperModel,
8888
"postprocess",
8989
) as mock_postprocess:
90-
model = largemodel.UnstructuredLargeModel()
90+
model = chipper.UnstructuredChipperModel()
9191
model.predict("hello")
9292
mock_predict_tokens.assert_called_once()
9393
mock_postprocess.assert_called_once()
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.5-dev3" # pragma: no cover
1+
__version__ = "0.5.5" # pragma: no cover

unstructured_inference/models/base.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from typing import Optional
22

3+
from unstructured_inference.models.chipper import MODEL_TYPES as CHIPPER_MODEL_TYPES
4+
from unstructured_inference.models.chipper import UnstructuredChipperModel
35
from unstructured_inference.models.detectron2 import (
46
MODEL_TYPES as DETECTRON2_MODEL_TYPES,
57
)
@@ -12,8 +14,6 @@
1214
from unstructured_inference.models.detectron2onnx import (
1315
UnstructuredDetectronONNXModel,
1416
)
15-
from unstructured_inference.models.largemodel import MODEL_TYPES as LARGE_MODEL_TYPES
16-
from unstructured_inference.models.largemodel import UnstructuredLargeModel
1717
from unstructured_inference.models.unstructuredmodel import UnstructuredModel
1818
from unstructured_inference.models.yolox import (
1919
MODEL_TYPES as YOLOX_MODEL_TYPES,
@@ -41,9 +41,9 @@ def get_model(model_name: Optional[str] = None) -> UnstructuredModel:
4141
elif model_name in YOLOX_MODEL_TYPES:
4242
model = UnstructuredYoloXModel()
4343
model.initialize(**YOLOX_MODEL_TYPES[model_name])
44-
elif model_name in LARGE_MODEL_TYPES:
45-
model = UnstructuredLargeModel()
46-
model.initialize(**LARGE_MODEL_TYPES[model_name])
44+
elif model_name in CHIPPER_MODEL_TYPES:
45+
model = UnstructuredChipperModel()
46+
model.initialize(**CHIPPER_MODEL_TYPES[model_name])
4747
else:
4848
raise UnknownModelException(f"Unknown model type: {model_name}")
4949
return model

unstructured_inference/models/largemodel.py renamed to unstructured_inference/models/chipper.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from unstructured_inference.models.unstructuredmodel import UnstructuredElementExtractionModel
1616

1717
MODEL_TYPES = {
18-
"large_model": {
18+
"chipper": {
1919
"tokenizer_name": "xlm-roberta-large",
2020
"pre_trained_model_name": "unstructuredio/ved-fine-tuning",
2121
},
@@ -57,7 +57,7 @@
5757
]
5858

5959

60-
class UnstructuredLargeModel(UnstructuredElementExtractionModel):
60+
class UnstructuredChipperModel(UnstructuredElementExtractionModel):
6161
required_w: int = 1248
6262
required_h: int = 1664
6363

0 commit comments

Comments
 (0)