Skip to content

Commit 5773481

Browse files
feat/astradb source connector (#143)
* initial commit * connector update * comment out extension * update expected tests. * update expected * astradb source connector updates * fix downloader * update fixtures * bump dev version * nit * cleanup * address comments * async downloader * update uploader * fixes wip * update response * tidy * make deepcopy of fd * update doc type to file, not csv --------- Co-authored-by: Shreya Nidadavolu <[email protected]> Co-authored-by: shreyanid <[email protected]>
1 parent 8445479 commit 5773481

16 files changed

+424
-533
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
## 0.2.1-dev1
1+
## 0.2.1-dev2
22

33
### Enhancements
44

55
* **Add singlestore source connector**
6+
* **Astra DB V2 Source Connector** Create a v2 version of the Astra DB Source Connector.
67

78
### Fixes
89

test_e2e/check-diff-expected-output.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def check_files(expected_output_dir: Path, current_output_dir: Path):
2929
if diff:
3030
print("diff in files that exist: {}".format(", ".join(diff)))
3131
print(f"expected files {expected_files}\nFiles obtained {current_files}")
32+
print('to update test fixtures, "export OVERWRITE_FIXTURES=true" and rerun this script')
3233
raise CheckError("The same files don't exist in both locations")
3334

3435

@@ -42,6 +43,7 @@ def check_contents(expected_output_dir: Path, current_output_dir: Path):
4243
if diffs:
4344
found_diff = True
4445
print(f"diffs between files {expected_file_path} and {current_file_path}")
46+
print('to update test fixtures, "export OVERWRITE_FIXTURES=true" and rerun this script')
4547
for diff in diffs:
4648
print(diff.to_json(indent=2))
4749
if found_diff:
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
[
2+
{
3+
"type": "Table",
4+
"element_id": "8d3eb636d29f7f6fa847c8eb9595e936",
5+
"text": "\n\n\n_id\ntitle\nreviewid\ncreationdate\ncriticname\noriginalscore\nreviewstate\nreviewtext\n\n\n25b75f1d-a2ea-4c97-b75f-1da2eadc97f7\nCity Hunter: Shinjuku Private Eyes\n2558908\n2019-02-14\nMatt Schley\n2.5/5\nrotten\nThe film's out-of-touch attempts at humor may find them hunting for the reason the franchise was so popular in the first place.\n\n\n",
6+
"metadata": {
7+
"text_as_html": "<table border=\"1\" class=\"dataframe\">\n <tbody>\n <tr>\n <td>_id</td>\n <td>title</td>\n <td>reviewid</td>\n <td>creationdate</td>\n <td>criticname</td>\n <td>originalscore</td>\n <td>reviewstate</td>\n <td>reviewtext</td>\n </tr>\n <tr>\n <td>25b75f1d-a2ea-4c97-b75f-1da2eadc97f7</td>\n <td>City Hunter: Shinjuku Private Eyes</td>\n <td>2558908</td>\n <td>2019-02-14</td>\n <td>Matt Schley</td>\n <td>2.5/5</td>\n <td>rotten</td>\n <td>The film's out-of-touch attempts at humor may find them hunting for the reason the franchise was so popular in the first place.</td>\n </tr>\n </tbody>\n</table>",
8+
"languages": [
9+
"eng"
10+
],
11+
"filetype": "text/csv",
12+
"data_source": {
13+
"url": null,
14+
"version": null,
15+
"record_locator": {
16+
"document_id": "25b75f1d-a2ea-4c97-b75f-1da2eadc97f7"
17+
},
18+
"date_created": null,
19+
"date_modified": null,
20+
"permissions_data": null,
21+
"filesize_bytes": 326
22+
}
23+
}
24+
}
25+
]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
[
2+
{
3+
"type": "Table",
4+
"element_id": "54075fb278986da7463a8714d103fe91",
5+
"text": "\n\n\n_id\ntitle\nreviewid\ncreationdate\ncriticname\noriginalscore\nreviewstate\nreviewtext\n\n\n60297eea-73d7-4fca-a97e-ea73d7cfca62\nCity Hunter: Shinjuku Private Eyes\n2590987\n2019-05-28\nReuben Baron\n\nfresh\nThe choreography is so precise and lifelike at points one might wonder whether the movie was rotoscoped, but no live-action reference footage was used. The quality is due to the skill of the animators and Kodama's love for professional wrestling.\n\n\n",
6+
"metadata": {
7+
"text_as_html": "<table border=\"1\" class=\"dataframe\">\n <tbody>\n <tr>\n <td>_id</td>\n <td>title</td>\n <td>reviewid</td>\n <td>creationdate</td>\n <td>criticname</td>\n <td>originalscore</td>\n <td>reviewstate</td>\n <td>reviewtext</td>\n </tr>\n <tr>\n <td>60297eea-73d7-4fca-a97e-ea73d7cfca62</td>\n <td>City Hunter: Shinjuku Private Eyes</td>\n <td>2590987</td>\n <td>2019-05-28</td>\n <td>Reuben Baron</td>\n <td></td>\n <td>fresh</td>\n <td>The choreography is so precise and lifelike at points one might wonder whether the movie was rotoscoped, but no live-action reference footage was used. The quality is due to the skill of the animators and Kodama's love for professional wrestling.</td>\n </tr>\n </tbody>\n</table>",
8+
"languages": [
9+
"eng"
10+
],
11+
"filetype": "text/csv",
12+
"data_source": {
13+
"url": null,
14+
"version": null,
15+
"record_locator": {
16+
"document_id": "60297eea-73d7-4fca-a97e-ea73d7cfca62"
17+
},
18+
"date_created": null,
19+
"date_modified": null,
20+
"permissions_data": null,
21+
"filesize_bytes": 442
22+
}
23+
}
24+
}
25+
]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
[
2+
{
3+
"type": "Table",
4+
"element_id": "9b09654fc0b4ed4058a4e2d3a5d16632",
5+
"text": "\n\n\n_id\ntitle\nreviewid\ncreationdate\ncriticname\noriginalscore\nreviewstate\nreviewtext\n\n\n641d99e3-9941-4c18-9d99-e399414c183d\nBeavers\n1145982\n2003-05-23\nIvan M. Lincoln\n3.5/4\nfresh\nTimed to be just long enough for most youngsters' brief attention spans -- and it's packed with plenty of interesting activity, both on land and under the water.\n\n\n",
6+
"metadata": {
7+
"text_as_html": "<table border=\"1\" class=\"dataframe\">\n <tbody>\n <tr>\n <td>_id</td>\n <td>title</td>\n <td>reviewid</td>\n <td>creationdate</td>\n <td>criticname</td>\n <td>originalscore</td>\n <td>reviewstate</td>\n <td>reviewtext</td>\n </tr>\n <tr>\n <td>641d99e3-9941-4c18-9d99-e399414c183d</td>\n <td>Beavers</td>\n <td>1145982</td>\n <td>2003-05-23</td>\n <td>Ivan M. Lincoln</td>\n <td>3.5/4</td>\n <td>fresh</td>\n <td>Timed to be just long enough for most youngsters' brief attention spans -- and it's packed with plenty of interesting activity, both on land and under the water.</td>\n </tr>\n </tbody>\n</table>",
8+
"languages": [
9+
"eng"
10+
],
11+
"filetype": "text/csv",
12+
"data_source": {
13+
"url": null,
14+
"version": null,
15+
"record_locator": {
16+
"document_id": "641d99e3-9941-4c18-9d99-e399414c183d"
17+
},
18+
"date_created": null,
19+
"date_modified": null,
20+
"permissions_data": null,
21+
"filesize_bytes": 338
22+
}
23+
}
24+
}
25+
]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
[
2+
{
3+
"type": "Table",
4+
"element_id": "18e1b77953bbbff2822a8c2038971b51",
5+
"text": "\n\n\n_id\ntitle\nreviewid\ncreationdate\ncriticname\noriginalscore\nreviewstate\nreviewtext\n\n\n762c0093-2277-4f3e-ac00-932277af3e0e\nBlood Mask\n1636744\n2007-06-02\nThe Foywonder\n1/5\nrotten\nIt doesn't matter if a movie costs 300 million or only 300 dollars; good is good and bad is bad, and Bloodmask: The Possession of Nicole Lameroux is just plain bad.\n\n\n",
6+
"metadata": {
7+
"text_as_html": "<table border=\"1\" class=\"dataframe\">\n <tbody>\n <tr>\n <td>_id</td>\n <td>title</td>\n <td>reviewid</td>\n <td>creationdate</td>\n <td>criticname</td>\n <td>originalscore</td>\n <td>reviewstate</td>\n <td>reviewtext</td>\n </tr>\n <tr>\n <td>762c0093-2277-4f3e-ac00-932277af3e0e</td>\n <td>Blood Mask</td>\n <td>1636744</td>\n <td>2007-06-02</td>\n <td>The Foywonder</td>\n <td>1/5</td>\n <td>rotten</td>\n <td>It doesn't matter if a movie costs 300 million or only 300 dollars; good is good and bad is bad, and Bloodmask: The Possession of Nicole Lameroux is just plain bad.</td>\n </tr>\n </tbody>\n</table>",
8+
"languages": [
9+
"eng"
10+
],
11+
"filetype": "text/csv",
12+
"data_source": {
13+
"url": null,
14+
"version": null,
15+
"record_locator": {
16+
"document_id": "762c0093-2277-4f3e-ac00-932277af3e0e"
17+
},
18+
"date_created": null,
19+
"date_modified": null,
20+
"permissions_data": null,
21+
"filesize_bytes": 341
22+
}
23+
}
24+
}
25+
]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
[
2+
{
3+
"type": "Table",
4+
"element_id": "66bb3ffa7d2c80ee4a227d8a76fc1777",
5+
"text": "\n\n\n_id\ntitle\nreviewid\ncreationdate\ncriticname\noriginalscore\nreviewstate\nreviewtext\n\n\nae40df94-0b3a-4f89-80df-940b3a6f8966\nDangerous Men\n2504681\n2018-08-29\nPat Padua\n\nfresh\nIts clumsy determination is endearing and sometimes wildly entertaining\n\n\n",
6+
"metadata": {
7+
"text_as_html": "<table border=\"1\" class=\"dataframe\">\n <tbody>\n <tr>\n <td>_id</td>\n <td>title</td>\n <td>reviewid</td>\n <td>creationdate</td>\n <td>criticname</td>\n <td>originalscore</td>\n <td>reviewstate</td>\n <td>reviewtext</td>\n </tr>\n <tr>\n <td>ae40df94-0b3a-4f89-80df-940b3a6f8966</td>\n <td>Dangerous Men</td>\n <td>2504681</td>\n <td>2018-08-29</td>\n <td>Pat Padua</td>\n <td></td>\n <td>fresh</td>\n <td>Its clumsy determination is endearing and sometimes wildly entertaining</td>\n </tr>\n </tbody>\n</table>",
8+
"languages": [
9+
"eng"
10+
],
11+
"filetype": "text/csv",
12+
"data_source": {
13+
"url": null,
14+
"version": null,
15+
"record_locator": {
16+
"document_id": "ae40df94-0b3a-4f89-80df-940b3a6f8966"
17+
},
18+
"date_created": null,
19+
"date_modified": null,
20+
"permissions_data": null,
21+
"filesize_bytes": 241
22+
}
23+
}
24+
}
25+
]

test_e2e/expected-structured-output/astradb/ingest_test_src/25b75f1d-a2ea-4c97-b75f-1da2eadc97f7.json

Lines changed: 0 additions & 98 deletions
This file was deleted.

test_e2e/expected-structured-output/astradb/ingest_test_src/60297eea-73d7-4fca-a97e-ea73d7cfca62.json

Lines changed: 0 additions & 26 deletions
This file was deleted.

test_e2e/expected-structured-output/astradb/ingest_test_src/641d99e3-9941-4c18-9d99-e399414c183d.json

Lines changed: 0 additions & 98 deletions
This file was deleted.

0 commit comments

Comments
 (0)