Skip to content

Commit 1fbc2e9

Browse files
authored
feat/migrate GitHub (#450)
* Add indexer * add downloader * bump changelog * add int test * drop old e2e test
1 parent 6bd552c commit 1fbc2e9

File tree

11 files changed

+457
-62
lines changed

11 files changed

+457
-62
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
1+
## 0.6.3
2+
3+
### Features
4+
5+
* **Migrate Github connector to v2**
6+
17
## 0.6.2
28

9+
### Features
10+
311
* **Support opinionated writes in databricks delta table connector**
412
* **Update databricks volume connector to emit user agent**
513
* **Delete previous content from databricks delta tables**
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"directory_structure": [
3+
"LICENSE.txt",
4+
"test.html"
5+
]
6+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
Downloadify: Client Side File Creation
2+
JavaScript + Flash Library
3+
4+
Copyright (c) 2009 Douglas C. Neiner
5+
6+
Permission is hereby granted, free of charge, to any person obtaining a copy
7+
of this software and associated documentation files (the "Software"), to deal
8+
in the Software without restriction, including without limitation the rights
9+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
copies of the Software, and to permit persons to whom the Software is
11+
furnished to do so, subject to the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included in
14+
all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
THE SOFTWARE.
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
2+
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
3+
<head>
4+
<title>Downloadify</title>
5+
<style type="text/css" media="screen">
6+
body {background: #fff; width: 500px; margin: 20px auto;}
7+
label, input, textarea, h1, h2, p { font-family: Arial, sans-serif; font-size: 12pt;}
8+
input, textarea { border: solid 1px #aaa; padding: 4px; width: 98%;}
9+
label { font-weight: bold;}
10+
h1 { font-size: 30pt; font-weight: bold; letter-spacing: -1px;}
11+
h2 { font-size: 14pt;}
12+
pre { overflow: auto; padding: 10px; background: #222; color: #ccc;}
13+
</style>
14+
<script type="text/javascript" src="js/swfobject.js"></script>
15+
<script type="text/javascript" src="js/downloadify.min.js"></script>
16+
</head>
17+
<body onload="load();">
18+
<h1>Downloadify Example</h1>
19+
<p>More info available at the <a href="http://github.com/dcneiner/Downloadify">Github Project Page</a></p>
20+
<form>
21+
<p>
22+
<label for="filename">Filename</label><br />
23+
<input type="text" name="filename" value="testfile.txt" id="filename" />
24+
</p>
25+
<p>
26+
<label for="data">File Contents</label><br />
27+
<textarea cols="60" rows="10" name="data" id="data">
28+
Whatever you put in this text box will be downloaded and saved in the file. If you leave it blank, no file will be downloaded</textarea>
29+
</p>
30+
<p id="downloadify">
31+
You must have Flash 10 installed to download this file.
32+
</p>
33+
</form>
34+
35+
<script type="text/javascript">
36+
function load(){
37+
Downloadify.create('downloadify',{
38+
filename: function(){
39+
return document.getElementById('filename').value;
40+
},
41+
data: function(){
42+
return document.getElementById('data').value;
43+
},
44+
onComplete: function(){ alert('Your File Has Been Saved!'); },
45+
onCancel: function(){ alert('You have cancelled the saving of this file.'); },
46+
onError: function(){ alert('You must put something in the File Contents or there will be nothing to save!'); },
47+
swf: 'media/downloadify.swf',
48+
downloadImage: 'images/download.png',
49+
width: 100,
50+
height: 30,
51+
transparent: true,
52+
append: false
53+
});
54+
}
55+
</script>
56+
<h2>Downloadify Invoke Script For This Page</h2>
57+
<pre>
58+
Downloadify.create(&#x27;downloadify&#x27;,{
59+
filename: function(){
60+
return document.getElementById(&#x27;filename&#x27;).value;
61+
},
62+
data: function(){
63+
return document.getElementById(&#x27;data&#x27;).value;
64+
},
65+
onComplete: function(){
66+
alert(&#x27;Your File Has Been Saved!&#x27;);
67+
},
68+
onCancel: function(){
69+
alert(&#x27;You have cancelled the saving of this file.&#x27;);
70+
},
71+
onError: function(){
72+
alert(&#x27;You must put something in the File Contents or there will be nothing to save!&#x27;);
73+
},
74+
swf: &#x27;media/downloadify.swf&#x27;,
75+
downloadImage: &#x27;images/download.png&#x27;,
76+
width: 100,
77+
height: 30,
78+
transparent: true,
79+
append: false
80+
});
81+
</pre>
82+
83+
</body>
84+
</html>
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"identifier": "7311a514-9924-571d-827e-db955b96320a",
3+
"connector_type": "github",
4+
"source_identifiers": {
5+
"filename": "LICENSE.txt",
6+
"fullpath": "master/LICENSE.txt",
7+
"rel_path": "LICENSE.txt"
8+
},
9+
"metadata": {
10+
"url": "https://api.github.com/repos/dcneiner/Downloadify/git/blobs/2c4f1ab8689a6dfef4ee7d13d4d935cb6663a7e4",
11+
"version": "W/\"a75db90ed53f327faf43999f4719f8d200fb19cf1db08fe6ace57e88426ec4f4\"",
12+
"record_locator": {},
13+
"date_created": null,
14+
"date_modified": "1739548161.0",
15+
"date_processed": "1743085024.178408",
16+
"permissions_data": [
17+
{
18+
"mode": "100644"
19+
}
20+
],
21+
"filesize_bytes": 1127
22+
},
23+
"additional_metadata": {},
24+
"reprocess": false,
25+
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpd1myoq5u/LICENSE.txt",
26+
"display_name": "https://github.com/dcneiner/Downloadify/blob/master/LICENSE.txt"
27+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"identifier": "7630c7f7-4d81-5ec0-b041-c94abc25e1ee",
3+
"connector_type": "github",
4+
"source_identifiers": {
5+
"filename": "test.html",
6+
"fullpath": "master/test.html",
7+
"rel_path": "test.html"
8+
},
9+
"metadata": {
10+
"url": "https://api.github.com/repos/dcneiner/Downloadify/git/blobs/c63c8fc21d46d44de85a14a3ed4baec0348ce344",
11+
"version": "W/\"a75db90ed53f327faf43999f4719f8d200fb19cf1db08fe6ace57e88426ec4f4\"",
12+
"record_locator": {},
13+
"date_created": null,
14+
"date_modified": "1739548161.0",
15+
"date_processed": "1743085025.2201269",
16+
"permissions_data": [
17+
{
18+
"mode": "100644"
19+
}
20+
],
21+
"filesize_bytes": 3001
22+
},
23+
"additional_metadata": {},
24+
"reprocess": false,
25+
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpd1myoq5u/test.html",
26+
"display_name": "https://github.com/dcneiner/Downloadify/blob/master/test.html"
27+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import os
2+
3+
import pytest
4+
5+
from test.integration.connectors.utils.constants import SOURCE_TAG, UNCATEGORIZED_TAG
6+
from test.integration.connectors.utils.validation.source import (
7+
SourceValidationConfigs,
8+
source_connector_validation,
9+
)
10+
from test.integration.utils import requires_env
11+
from unstructured_ingest.v2.processes.connectors.github import (
12+
CONNECTOR_TYPE,
13+
GithubAccessConfig,
14+
GithubConnectionConfig,
15+
GithubDownloader,
16+
GithubDownloaderConfig,
17+
GithubIndexer,
18+
GithubIndexerConfig,
19+
)
20+
21+
22+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, UNCATEGORIZED_TAG)
23+
@pytest.mark.asyncio
24+
@requires_env("GH_READ_ONLY_ACCESS_TOKEN")
25+
async def test_github_source(temp_dir):
26+
access_token = os.environ["GH_READ_ONLY_ACCESS_TOKEN"]
27+
connection_config = GithubConnectionConfig(
28+
access_config=GithubAccessConfig(access_token=access_token),
29+
url="dcneiner/Downloadify",
30+
)
31+
32+
indexer = GithubIndexer(
33+
connection_config=connection_config,
34+
index_config=GithubIndexerConfig(file_glob=["*.txt", "*.html"]),
35+
)
36+
37+
downloader = GithubDownloader(
38+
connection_config=connection_config,
39+
download_config=GithubDownloaderConfig(download_dir=temp_dir),
40+
)
41+
42+
# Run the source connector validation
43+
await source_connector_validation(
44+
indexer=indexer,
45+
downloader=downloader,
46+
configs=SourceValidationConfigs(
47+
test_id="github", expected_num_files=2, validate_downloaded_files=True
48+
),
49+
)

test_e2e/src/github.sh

Lines changed: 0 additions & 60 deletions
This file was deleted.

test_e2e/test-src.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ all_tests=(
2727
# 's3-compression.sh'
2828
'salesforce.sh'
2929
'box.sh'
30-
'github.sh'
3130
'gitlab.sh'
3231
'google-drive.sh'
3332
'wikipedia.sh'

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.6.2" # pragma: no cover
1+
__version__ = "0.6.3" # pragma: no cover

0 commit comments

Comments
 (0)