Skip to content

Commit 5f4dee4

Browse files
authored
Split SQL v2 destination connector into PostgreSQL v2 and SQLite v2 destination connectors (#292)
1 parent c771571 commit 5f4dee4

File tree

17 files changed

+239
-170
lines changed

17 files changed

+239
-170
lines changed
Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,23 @@
11
---
2-
title: SQL
2+
title: PostgreSQL
33
---
44

55
import NewDocument from '/snippets/general-shared-text/new-document.mdx';
66

77
<NewDocument />
88

9-
import SharedContentSQL from '/snippets/dc-shared-text/sql-cli-api.mdx';
9+
import SharedContentPostgreSQL from '/snippets/dc-shared-text/postgresql-cli-api.mdx';
1010
import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx';
1111

12-
<SharedContentSQL/>
12+
<SharedContentPostgreSQL/>
1313
<SharedAPIKeyURL/>
1414

1515
Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The source connector can be any of the ones supported. This example uses the local source connector:
1616

17-
import SQLAPISh from '/snippets/destination_connectors/sql.sh.mdx';
18-
import SQLAPIPyV2 from '/snippets/destination_connectors/sql.v2.py.mdx';
19-
import SQLAPIPyV1 from '/snippets/destination_connectors/sql.v1.py.mdx';
17+
import PostgreSQLAPISh from '/snippets/destination_connectors/postgresql.sh.mdx';
18+
import PostgreSQLAPIPyV2 from '/snippets/destination_connectors/postgresql.v2.py.mdx';
2019

2120
<CodeGroup>
22-
<SQLAPISh />
23-
<SQLAPIPyV2 />
24-
<SQLAPIPyV1 />
21+
<PostgreSQLAPISh />
22+
<PostgreSQLAPIPyV2 />
2523
</CodeGroup>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
---
2+
title: SQLite
3+
---
4+
5+
import NewDocument from '/snippets/general-shared-text/new-document.mdx';
6+
7+
<NewDocument />
8+
9+
import SharedContentSQLite from '/snippets/dc-shared-text/sqlite-cli-api.mdx';
10+
import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx';
11+
12+
<SharedContentSQLite/>
13+
<SharedAPIKeyURL/>
14+
15+
Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The source connector can be any of the ones supported. This example uses the local source connector:
16+
17+
import SQLiteAPISh from '/snippets/destination_connectors/sqlite.sh.mdx';
18+
import SQLiteAPIPyV2 from '/snippets/destination_connectors/sqlite.v2.py.mdx';
19+
20+
<CodeGroup>
21+
<SQLiteAPISh />
22+
<SQLiteAPIPyV2 />
23+
</CodeGroup>

mint.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,11 +199,12 @@
199199
"open-source/ingest/destination-connectors/mongodb",
200200
"open-source/ingest/destination-connectors/opensearch",
201201
"open-source/ingest/destination-connectors/pinecone",
202+
"open-source/ingest/destination-connectors/postgresql",
202203
"open-source/ingest/destination-connectors/qdrant",
203204
"open-source/ingest/destination-connectors/s3",
204205
"open-source/ingest/destination-connectors/sftp",
205206
"open-source/ingest/destination-connectors/singlestore",
206-
"open-source/ingest/destination-connectors/sql",
207+
"open-source/ingest/destination-connectors/sqlite",
207208
"open-source/ingest/destination-connectors/vectara",
208209
"open-source/ingest/destination-connectors/weaviate"
209210
]
@@ -348,11 +349,12 @@
348349
"api-reference/ingest/destination-connector/mongodb",
349350
"api-reference/ingest/destination-connector/opensearch",
350351
"api-reference/ingest/destination-connector/pinecone",
352+
"api-reference/ingest/destination-connector/postgresql",
351353
"api-reference/ingest/destination-connector/qdrant",
352354
"api-reference/ingest/destination-connector/s3",
353355
"api-reference/ingest/destination-connector/sftp",
354356
"api-reference/ingest/destination-connector/singlestore",
355-
"api-reference/ingest/destination-connector/sql",
357+
"api-reference/ingest/destination-connector/sqlite",
356358
"api-reference/ingest/destination-connector/vectara",
357359
"api-reference/ingest/destination-connector/weaviate"
358360
]

open-source/ingest/destination-connectors/sql.mdx renamed to open-source/ingest/destination-connectors/postgresql.mdx

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,25 @@
11
---
2-
title: SQL
2+
title: PostgreSQL
33
---
44

55
import NewDocument from '/snippets/general-shared-text/new-document.mdx';
66

77
<NewDocument />
88

9-
import SharedSQL from '/snippets/dc-shared-text/sql-cli-api.mdx';
9+
import SharedPostgreSQL from '/snippets/dc-shared-text/postgresql-cli-api.mdx';
1010

11-
<SharedSQL/>
11+
<SharedPostgreSQL/>
1212

1313
Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The source connector can be any of the ones supported. This example uses the local source connector:
1414

1515
This example sends files to Unstructured API services for processing by default. To process files locally instead, see the instructions at the end of this page.
1616

17-
import SQLAPISh from '/snippets/destination_connectors/sql.sh.mdx';
18-
import SQLAPIPyV2 from '/snippets/destination_connectors/sql.v2.py.mdx';
19-
import SQLAPIPyV1 from '/snippets/destination_connectors/sql.v1.py.mdx';
17+
import PostgreSQLAPISh from '/snippets/destination_connectors/postgresql.sh.mdx';
18+
import PostgreSQLAPIPyV2 from '/snippets/destination_connectors/postgresql.v2.py.mdx';
2019

2120
<CodeGroup>
22-
<SQLAPISh />
23-
<SQLAPIPyV2 />
24-
<SQLAPIPyV1 />
21+
<PostgreSQLAPISh />
22+
<PostgreSQLAPIPyV2 />
2523
</CodeGroup>
2624

2725
import SharedPartitionByAPIOSS from '/snippets/ingest-configuration-shared/partition-by-api-oss.mdx';
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
---
2+
title: SQLite
3+
---
4+
5+
import NewDocument from '/snippets/general-shared-text/new-document.mdx';
6+
7+
<NewDocument />
8+
9+
import SharedSQLite from '/snippets/dc-shared-text/sqlite-cli-api.mdx';
10+
11+
<SharedSQLite/>
12+
13+
Now call the Unstructured Ingest CLI or the Unstructured Ingest Python library. The source connector can be any of the ones supported. This example uses the local source connector:
14+
15+
This example sends files to Unstructured API services for processing by default. To process files locally instead, see the instructions at the end of this page.
16+
17+
import SQLiteAPISh from '/snippets/destination_connectors/sqlite.sh.mdx';
18+
import SQLiteAPIPyV2 from '/snippets/destination_connectors/sqlite.v2.py.mdx';
19+
20+
<CodeGroup>
21+
<SQLiteAPISh />
22+
<SQLiteAPIPyV2 />
23+
</CodeGroup>
24+
25+
import SharedPartitionByAPIOSS from '/snippets/ingest-configuration-shared/partition-by-api-oss.mdx';
26+
27+
<SharedPartitionByAPIOSS/>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Batch process all your records to store structured outputs in a PostgreSQL schema.
2+
3+
Insert query is currently limited to append.
4+
5+
You will need:
6+
7+
import SharedPostgreSQL from '/snippets/general-shared-text/postgresql.mdx';
8+
import SharedPostgreSQLCLIAPI from '/snippets/general-shared-text/postgresql-cli-api.mdx';
9+
10+
<SharedPostgreSQL />
11+
<SharedPostgreSQLCLIAPI />

snippets/dc-shared-text/sql-cli-api.mdx

Lines changed: 0 additions & 11 deletions
This file was deleted.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Batch process all your records to store structured outputs in a SQLite schema.
2+
3+
Insert query is currently limited to append.
4+
5+
You will need:
6+
7+
import SharedSQLite from '/snippets/general-shared-text/sqlite.mdx';
8+
import SharedSQLiteCLIAPI from '/snippets/general-shared-text/sqlite-cli-api.mdx';
9+
10+
<SharedSQLite />
11+
<SharedSQLiteCLIAPI />

snippets/destination_connectors/sql.sh.mdx renamed to snippets/destination_connectors/postgresql.sh.mdx

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,10 @@ unstructured-ingest \
2424
--partition-endpoint $UNSTRUCTURED_API_URL \
2525
--metadata-include "$metadata_includes" \
2626
--additional-partition-args="{\"split_pdf_page\":\"true\", \"split_pdf_allow_failed\":\"true\", \"split_pdf_concurrency_level\": 15}" \
27-
sql \
28-
--db-type $SQL_DB_TYPE \
27+
postgres \
2928
--username $PGUSER \
3029
--password $PGPASSWORD \
3130
--host $PGHOST \
3231
--port $PGPORT \
3332
--database $PGDATABASE
34-
```
33+
```

snippets/destination_connectors/sql.v2.py.mdx renamed to snippets/destination_connectors/postgresql.v2.py.mdx

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ import os
44
from unstructured_ingest.v2.pipeline.pipeline import Pipeline
55
from unstructured_ingest.v2.interfaces import ProcessorConfig
66

7-
from unstructured_ingest.v2.processes.connectors.sql import (
8-
SQLConnectionConfig,
9-
SQLAccessConfig,
10-
SQLUploaderConfig,
11-
SQLUploadStagerConfig
7+
from unstructured_ingest.v2.processes.connectors.sql.postgres import (
8+
PostgresConnectionConfig,
9+
PostgresAccessConfig,
10+
PostgresUploaderConfig,
11+
PostgresUploadStagerConfig
1212
)
1313
from unstructured_ingest.v2.processes.connectors.local import (
1414
LocalIndexerConfig,
@@ -46,26 +46,22 @@ if __name__ == "__main__":
4646
api_key=os.getenv("UNSTRUCTURED_API_KEY"),
4747
partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"),
4848
metadata_include=metadata_includes,
49-
strategy="hi_res",
5049
additional_partition_args={
5150
"split_pdf_page": True,
5251
"split_pdf_allow_failed": True,
5352
"split_pdf_concurrency_level": 15
5453
}
5554
),
5655
chunker_config=ChunkerConfig(chunking_strategy="by_title"),
57-
embedder_config=EmbedderConfig(embedding_provider="huggingface"),
58-
destination_connection_config=SQLConnectionConfig(
59-
access_config=SQLAccessConfig(
60-
username=os.getenv("PGUSER"),
61-
password=os.getenv("PGPASSWORD")
62-
),
63-
db_type=os.getenv("SQL_DB_TYPE"),
56+
embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"),
57+
destination_connection_config=PostgresConnectionConfig(
58+
access_config=PostgresAccessConfig(password=os.getenv("PGPASSWORD")),
6459
host=os.getenv("PGHOST"),
6560
port=os.getenv("PGPORT"),
61+
username=os.getenv("PGUSER"),
6662
database=os.getenv("PGDATABASE")
6763
),
68-
stager_config=SQLUploadStagerConfig(),
69-
uploader_config=SQLUploaderConfig()
64+
stager_config=PostgresUploadStagerConfig(),
65+
uploader_config=PostgresUploaderConfig()
7066
).run()
7167
```

0 commit comments

Comments
 (0)