Skip to content

Commit 02e6fc4

Browse files
authored
Fix: Update examples to acknowledge actual folder structure; remove incorrect logging (#564)
For some reason all examples point to folders that are too high in the hierarchy, making them always fail. This PR changes them to point to actual example_data. Another change is getting rid of logs that didn't even work - due to the current logger approach, they've lacked a handler and never logged anything.
1 parent 3c527df commit 02e6fc4

27 files changed

+55
-98
lines changed

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1+
## 1.1.1
2+
3+
* **Fix: Update examples**
4+
15
## 1.1.0
26

3-
- **Feature**: Embedding with OpenAI (or Azure OpenAI) can trust custom certificate authority by specifying environment variable REQUESTS_CA_BUNDLE.
7+
* **Feature**: Embedding with OpenAI (or Azure OpenAI) can trust custom certificate authority by specifying environment variable REQUESTS_CA_BUNDLE.
48

59
## 1.0.59
610

examples/airtable.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from pathlib import Path
33

44
from unstructured_ingest.interfaces import ProcessorConfig
5-
from unstructured_ingest.logger import logger
65
from unstructured_ingest.pipeline.pipeline import Pipeline
76
from unstructured_ingest.processes.chunker import ChunkerConfig
87
from unstructured_ingest.processes.connectors.airtable import (
@@ -18,14 +17,13 @@
1817
from unstructured_ingest.processes.embedder import EmbedderConfig
1918
from unstructured_ingest.processes.partitioner import PartitionerConfig
2019

21-
base_path = Path(__file__).parent.parent.parent.parent
20+
base_path = Path(__file__).parent.parent
2221
docs_path = base_path / "example-docs"
2322
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
2423
output_path = work_dir / "output"
2524
download_path = work_dir / "download"
2625

2726
if __name__ == "__main__":
28-
logger.info(f"writing all content in: {work_dir.resolve()}")
2927
Pipeline.from_configs(
3028
context=ProcessorConfig(work_dir=str(work_dir.resolve()), verbose=True),
3129
indexer_config=AirtableIndexerConfig(

examples/azure_cognitive_search.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from pathlib import Path
33

44
from unstructured_ingest.interfaces import ProcessorConfig
5-
from unstructured_ingest.logger import logger
65
from unstructured_ingest.pipeline.pipeline import Pipeline
76
from unstructured_ingest.processes.chunker import ChunkerConfig
87
from unstructured_ingest.processes.connectors.azure_ai_search import (
@@ -20,17 +19,16 @@
2019
from unstructured_ingest.processes.embedder import EmbedderConfig
2120
from unstructured_ingest.processes.partitioner import PartitionerConfig
2221

23-
base_path = Path(__file__).parent.parent.parent.parent
22+
base_path = Path(__file__).parent.parent
2423
docs_path = base_path / "example-docs"
2524
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
2625
output_path = work_dir / "output"
2726
download_path = work_dir / "download"
2827

2928
if __name__ == "__main__":
30-
logger.info(f"writing all content in: {work_dir.resolve()}")
3129
index_name = "ingest-test-destination"
3230
Pipeline.from_configs(
33-
context=ProcessorConfig(work_dir=str(work_dir.resolve())),
31+
context=ProcessorConfig(work_dir=str(work_dir.resolve()), verbose=True),
3432
indexer_config=LocalIndexerConfig(
3533
input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt"
3634
),

examples/chroma.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from pathlib import Path
33

44
from unstructured_ingest.interfaces import ProcessorConfig
5-
from unstructured_ingest.logger import logger
65
from unstructured_ingest.pipeline.pipeline import Pipeline
76
from unstructured_ingest.processes.chunker import ChunkerConfig
87
from unstructured_ingest.processes.connectors.chroma import (
@@ -20,16 +19,15 @@
2019
from unstructured_ingest.processes.embedder import EmbedderConfig
2120
from unstructured_ingest.processes.partitioner import PartitionerConfig
2221

23-
base_path = Path(__file__).parent.parent.parent.parent
22+
base_path = Path(__file__).parent.parent
2423
docs_path = base_path / "example-docs"
2524
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
2625
output_path = work_dir / "output"
2726
download_path = work_dir / "download"
2827

2928
if __name__ == "__main__":
30-
logger.info(f"writing all content in: {work_dir.resolve()}")
3129
Pipeline.from_configs(
32-
context=ProcessorConfig(work_dir=str(work_dir.resolve())),
30+
context=ProcessorConfig(work_dir=str(work_dir.resolve()), verbose=True),
3331
indexer_config=LocalIndexerConfig(input_path=docs_path.resolve() / "multisimple"),
3432
downloader_config=LocalDownloaderConfig(download_dir=download_path),
3533
source_connection_config=LocalConnectionConfig(),

examples/couchbase.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from pathlib import Path
22

33
from unstructured_ingest.interfaces import ProcessorConfig
4-
from unstructured_ingest.logger import logger
54
from unstructured_ingest.pipeline.pipeline import Pipeline
65
from unstructured_ingest.processes.chunker import ChunkerConfig
76
from unstructured_ingest.processes.connectors.couchbase import (
@@ -19,16 +18,15 @@
1918
from unstructured_ingest.processes.embedder import EmbedderConfig
2019
from unstructured_ingest.processes.partitioner import PartitionerConfig
2120

22-
base_path = Path(__file__).parent.parent.parent.parent
21+
base_path = Path(__file__).parent.parent
2322
docs_path = base_path / "example-docs"
2423
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
2524
output_path = work_dir / "output"
2625
download_path = work_dir / "download"
2726

2827
if __name__ == "__main__":
29-
logger.info(f"writing all content in: {work_dir.resolve()}")
3028
Pipeline.from_configs(
31-
context=ProcessorConfig(work_dir=str(work_dir.resolve())),
29+
context=ProcessorConfig(work_dir=str(work_dir.resolve()), verbose=True),
3230
indexer_config=LocalIndexerConfig(input_path=str(docs_path.resolve()) + "/multisimple/"),
3331
downloader_config=LocalDownloaderConfig(download_dir=download_path),
3432
source_connection_config=LocalConnectionConfig(),

examples/databricks_volumes_dest.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from pathlib import Path
33

44
from unstructured_ingest.interfaces import ProcessorConfig
5-
from unstructured_ingest.logger import logger
65
from unstructured_ingest.pipeline.pipeline import Pipeline
76
from unstructured_ingest.processes.chunker import ChunkerConfig
87
from unstructured_ingest.processes.connectors.databricks.volumes_native import (
@@ -18,16 +17,15 @@
1817
)
1918
from unstructured_ingest.processes.partitioner import PartitionerConfig
2019

21-
base_path = Path(__file__).parent.parent.parent.parent
20+
base_path = Path(__file__).parent.parent
2221
docs_path = base_path / "example-docs"
2322
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
2423
output_path = work_dir / "output"
2524
download_path = work_dir / "download"
2625

2726
if __name__ == "__main__":
28-
logger.info(f"writing all content in: {work_dir.resolve()}")
2927
Pipeline.from_configs(
30-
context=ProcessorConfig(work_dir=str(work_dir.resolve())),
28+
context=ProcessorConfig(work_dir=str(work_dir.resolve()), verbose=True),
3129
indexer_config=LocalIndexerConfig(input_path=str(docs_path.resolve()) + "/fake-text.txt"),
3230
downloader_config=LocalDownloaderConfig(download_dir=download_path),
3331
source_connection_config=LocalConnectionConfig(),

examples/databricks_volumes_source.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from pathlib import Path
33

44
from unstructured_ingest.interfaces import ProcessorConfig
5-
from unstructured_ingest.logger import logger
65
from unstructured_ingest.pipeline.pipeline import Pipeline
76
from unstructured_ingest.processes.chunker import ChunkerConfig
87
from unstructured_ingest.processes.connectors.databricks.volumes_native import (
@@ -17,16 +16,15 @@
1716
)
1817
from unstructured_ingest.processes.partitioner import PartitionerConfig
1918

20-
base_path = Path(__file__).parent.parent.parent.parent
19+
base_path = Path(__file__).parent.parent
2120
docs_path = base_path / "example-docs"
2221
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
2322
output_path = work_dir / "output"
2423
download_path = work_dir / "download"
2524

2625
if __name__ == "__main__":
27-
logger.info(f"writing all content in: {work_dir.resolve()}")
2826
Pipeline.from_configs(
29-
context=ProcessorConfig(work_dir=str(work_dir.resolve())),
27+
context=ProcessorConfig(work_dir=str(work_dir.resolve()), verbose=True),
3028
indexer_config=DatabricksNativeVolumesIndexerConfig(
3129
host=os.environ["DATABRICKS_HOST"],
3230
catalog=os.environ["DATABRICKS_CATALOG"],

examples/delta_table.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from pathlib import Path
22

33
from unstructured_ingest.interfaces import ProcessorConfig
4-
from unstructured_ingest.logger import logger
54
from unstructured_ingest.pipeline.pipeline import Pipeline
65
from unstructured_ingest.processes.chunker import ChunkerConfig
76
from unstructured_ingest.processes.connectors.delta_table import (
@@ -19,16 +18,15 @@
1918
from unstructured_ingest.processes.embedder import EmbedderConfig
2019
from unstructured_ingest.processes.partitioner import PartitionerConfig
2120

22-
base_path = Path(__file__).parent.parent.parent.parent
21+
base_path = Path(__file__).parent.parent
2322
docs_path = base_path / "example-docs"
2423
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
2524
output_path = work_dir / "output"
2625
download_path = work_dir / "download"
2726

2827
if __name__ == "__main__":
29-
logger.info(f"writing all content in: {work_dir.resolve()}")
3028
Pipeline.from_configs(
31-
context=ProcessorConfig(work_dir=str(work_dir.resolve())),
29+
context=ProcessorConfig(work_dir=str(work_dir.resolve()), verbose=True),
3230
indexer_config=LocalIndexerConfig(
3331
input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt",
3432
),

examples/discord_example.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from pathlib import Path
33

44
from unstructured_ingest.interfaces import ProcessorConfig
5-
from unstructured_ingest.logger import logger
65
from unstructured_ingest.pipeline.pipeline import Pipeline
76
from unstructured_ingest.processes.connectors.discord import (
87
CONNECTOR_TYPE,
@@ -14,14 +13,13 @@
1413
from unstructured_ingest.processes.connectors.local import LocalUploaderConfig
1514
from unstructured_ingest.processes.partitioner import PartitionerConfig
1615

17-
base_path = Path(__file__).parent.parent.parent.parent
16+
base_path = Path(__file__).parent.parent
1817
docs_path = base_path / "example-docs"
1918
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
2019
output_path = work_dir / "output"
2120
download_path = work_dir / "download"
2221

2322
if __name__ == "__main__":
24-
logger.info(f"writing all content in: {work_dir.resolve()}")
2523
Pipeline.from_configs(
2624
context=ProcessorConfig(work_dir=str(work_dir.resolve()), tqdm=True, verbose=True),
2725
indexer_config=DiscordIndexerConfig(channels=os.environ["DISCORD_CHANNELS"].split(",")),

examples/elasticsearch.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from pathlib import Path
33

44
from unstructured_ingest.interfaces import ProcessorConfig
5-
from unstructured_ingest.logger import logger
65
from unstructured_ingest.pipeline.pipeline import Pipeline
76
from unstructured_ingest.processes.chunker import ChunkerConfig
87
from unstructured_ingest.processes.connectors.elasticsearch import (
@@ -20,17 +19,16 @@
2019
from unstructured_ingest.processes.embedder import EmbedderConfig
2120
from unstructured_ingest.processes.partitioner import PartitionerConfig
2221

23-
base_path = Path(__file__).parent.parent.parent.parent
22+
base_path = Path(__file__).parent.parent
2423
docs_path = base_path / "example-docs"
2524
work_dir = base_path / "tmp_ingest" / CONNECTOR_TYPE
2625
output_path = work_dir / "output"
2726
download_path = work_dir / "download"
2827

2928
if __name__ == "__main__":
30-
logger.info(f"writing all content in: {work_dir.resolve()}")
3129
index_name = "ingest-test-destination"
3230
Pipeline.from_configs(
33-
context=ProcessorConfig(work_dir=str(work_dir.resolve())),
31+
context=ProcessorConfig(work_dir=str(work_dir.resolve()), verbose=True),
3432
indexer_config=LocalIndexerConfig(
3533
input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt"
3634
),

0 commit comments

Comments
 (0)