|
1 | 1 | import os.path as osp |
| 2 | +from typing import List |
2 | 3 |
|
3 | | -from clarifai_datautils.multimodal import Pipeline |
| 4 | +import pytest |
| 5 | +from schema import SchemaError |
| 6 | + |
| 7 | +from clarifai_datautils.multimodal import PDFPartition, Pipeline |
| 8 | +from clarifai_datautils.multimodal.pipeline.cleaners import Clean_extra_whitespace |
| 9 | +from clarifai_datautils.multimodal.pipeline.extractors import (ExtractDateTimeTz, |
| 10 | + ExtractEmailAddress) |
4 | 11 |
|
5 | 12 | PDF_FILE_PATH = osp.abspath(osp.join(osp.dirname(__file__), "assets", "DA-1p.pdf")) |
6 | 13 | TEXT_FILE_PATH = osp.abspath( |
7 | 14 | osp.join(osp.dirname(__file__), "assets", "book-war-and-peace-1p.txt")) |
8 | 15 |
|
9 | 16 |
|
| 17 | +class Test_transformation(): |
| 18 | + |
| 19 | + def __init__(self): |
| 20 | + pass |
| 21 | + |
| 22 | + def __call__(self,) -> List: |
| 23 | + """Applies the transformation. |
| 24 | + """ |
| 25 | + pass |
| 26 | + |
| 27 | + |
10 | 28 | class TestReadyToUsePipelines: |
11 | 29 | """Tests for ready to use pipelines.""" |
12 | 30 |
|
@@ -77,3 +95,38 @@ def test_pipeline_standard_markdown(self,): |
77 | 95 | assert pipeline.transformations[0].__class__.__name__ == 'MarkdownPartition' |
78 | 96 | assert pipeline.transformations[1].__class__.__name__ == 'Clean_extra_whitespace' |
79 | 97 | assert pipeline.transformations[2].__class__.__name__ == 'Group_broken_paragraphs' |
| 98 | + |
| 99 | + def test_schema_error(self): |
| 100 | + # Incorrect type of transformations object |
| 101 | + with pytest.raises(SchemaError): |
| 102 | + _ = Pipeline( |
| 103 | + name='test-1', |
| 104 | + transformations=( |
| 105 | + PDFPartition(max_characters=1024, overlap=None), |
| 106 | + Clean_extra_whitespace(), |
| 107 | + ExtractDateTimeTz(), |
| 108 | + ExtractEmailAddress(), |
| 109 | + )) |
| 110 | + |
| 111 | + # Incorrect First transformation |
| 112 | + with pytest.raises(SchemaError): |
| 113 | + _ = Pipeline( |
| 114 | + name='test-2', |
| 115 | + transformations=[ |
| 116 | + Clean_extra_whitespace(), |
| 117 | + PDFPartition(max_characters=1024, overlap=None), |
| 118 | + ExtractDateTimeTz(), |
| 119 | + ExtractEmailAddress(), |
| 120 | + ]) |
| 121 | + |
| 122 | + # Incorrect Instance of transformation |
| 123 | + with pytest.raises(SchemaError): |
| 124 | + _ = Pipeline( |
| 125 | + name='test-3', |
| 126 | + transformations=[ |
| 127 | + PDFPartition(max_characters=1024, overlap=None), |
| 128 | + Clean_extra_whitespace(), |
| 129 | + ExtractDateTimeTz(), |
| 130 | + ExtractEmailAddress(), |
| 131 | + Test_transformation(), |
| 132 | + ]) |
0 commit comments