Skip to content

Commit 17e89be

Browse files
authored
Ingest: Discord source connector (#587)
1 parent e635a45 commit 17e89be

File tree

7 files changed

+113
-0
lines changed

7 files changed

+113
-0
lines changed

docs.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@
302302
"ingestion/source-connectors/couchbase",
303303
"ingestion/source-connectors/databricks-volumes",
304304
"ingestion/source-connectors/delta-table",
305+
"ingestion/source-connectors/discord",
305306
"ingestion/source-connectors/dropbox",
306307
"ingestion/source-connectors/elastic-search",
307308
"ingestion/source-connectors/github",
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
---
2+
title: Discord
3+
---
4+
5+
import NewDocument from '/snippets/general-shared-text/new-document.mdx';
6+
7+
<NewDocument />
8+
9+
import SharedContentDiscord from '/snippets/sc-shared-text/discord-cli-api.mdx';
10+
11+
<SharedContentDiscord/>
12+
13+
Now call the Unstructured CLI or Python. The destination connector can be any of the ones supported. This example uses the local destination connector.
14+
15+
This example sends data to Unstructured for processing by default. To process data locally instead, see the instructions at the end of this page.
16+
17+
import DiscordSh from '/snippets/source_connectors/discord.sh.mdx';
18+
import DiscordPyV2 from '/snippets/source_connectors/discord.v2.py.mdx';
19+
20+
<CodeGroup>
21+
<DiscordSh />
22+
<DiscordPyV2 />
23+
</CodeGroup>
24+
25+
import SharedPartitionByAPIOSS from '/snippets/ingest-configuration-shared/partition-by-api-oss.mdx';
26+
27+
<SharedPartitionByAPIOSS/>
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
The Discord connector dependencies:
2+
3+
```bash CLI, Python
4+
pip install "unstructured-ingest[discord]"
5+
```
6+
7+
import AdditionalIngestDependencies from '/snippets/general-shared-text/ingest-dependencies.mdx';
8+
9+
<AdditionalIngestDependencies />
10+
11+
The following environment variables:
12+
13+
- `DISCORD_ACCESS_TOKEN` - The Discord access token, represented by `--token` (CLI) or `token` (Python).
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
- A Discord [access token](https://discord.com/developers/docs/topics/oauth2).
2+
- The target Discord [channel IDs](https://support.discord.com/hc/articles/206346498-Where-can-I-find-my-User-Server-Message-ID#h_01HRSTXPS5FMK2A5SMVSX4JW4E).
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Connect Discord to your preprocessing pipeline, and use the Unstructured CLI or Python to batch process all your documents and store structured outputs locally on your filesystem.
2+
3+
The requirements are as follows.
4+
5+
import SharedDiscord from '/snippets/general-shared-text/discord.mdx';
6+
import SharedDiscordCLIAPI from '/snippets/general-shared-text/discord-cli-api.mdx';
7+
8+
<SharedDiscord />
9+
<SharedDiscordCLIAPI />
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
```bash CLI
2+
#!/usr/bin/env bash
3+
4+
unstructured-ingest \
5+
discord \
6+
--token $DISCORD_ACCESS_TOKEN \
7+
--channels 475182341782896651,847950191108554762 \
8+
--output-dir $LOCAL_FILE_OUTPUT_DIR \
9+
--partition-by-api \
10+
--api-key $UNSTRUCTURED_API_KEY \
11+
--partition-endpoint $UNSTRUCTURED_API_URL
12+
```
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
```python Python Ingest
2+
import os
3+
4+
from unstructured_ingest.pipeline.pipeline import Pipeline
5+
from unstructured_ingest.interfaces import ProcessorConfig
6+
7+
from unstructured_ingest.processes.connectors.discord import (
8+
DiscordIndexerConfig,
9+
DiscordDownloaderConfig,
10+
DiscordConnectionConfig,
11+
DiscordAccessConfig
12+
)
13+
14+
from unstructured_ingest.processes.partitioner import PartitionerConfig
15+
from unstructured_ingest.processes.chunker import ChunkerConfig
16+
from unstructured_ingest.processes.embedder import EmbedderConfig
17+
18+
from unstructured_ingest.processes.connectors.local import LocalUploaderConfig
19+
20+
# Chunking and embedding are optional.
21+
22+
if __name__ == "__main__":
23+
Pipeline.from_configs(
24+
context=ProcessorConfig(),
25+
indexer_config=DiscordIndexerConfig(
26+
channels=[
27+
"475182341782896651",
28+
"847950191108554762"
29+
]
30+
),
31+
downloader_config=DiscordDownloaderConfig(download_dir=os.getenv("LOCAL_FILE_DOWNLOAD_DIR")),
32+
source_connection_config=DiscordConnectionConfig(
33+
access_config=DiscordAccessConfig(token=os.getenv("DISCORD_ACCESS_TOKEN"))
34+
),
35+
partitioner_config=PartitionerConfig(
36+
partition_by_api=True,
37+
api_key=os.getenv("UNSTRUCTURED_API_KEY"),
38+
partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"),
39+
additional_partition_args={
40+
"split_pdf_page": True,
41+
"split_pdf_allow_failed": True,
42+
"split_pdf_concurrency_level": 15
43+
}
44+
),
45+
chunker_config=ChunkerConfig(chunking_strategy="by_title"),
46+
embedder_config=EmbedderConfig(embedding_provider="huggingface"),
47+
uploader_config=LocalUploaderConfig(output_dir=os.getenv("LOCAL_FILE_OUTPUT_DIR"))
48+
).run()
49+
```

0 commit comments

Comments
 (0)