Skip to content

Commit abf4915

Browse files
authored
Merge pull request #173 from mmccarty/data-sources
Configurable data source locations
2 parents 5182b1d + 05e3c17 commit abf4915

File tree

4 files changed

+30
-3
lines changed

4 files changed

+30
-3
lines changed

01x_lazy.ipynb

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,17 @@
412412
"Similarly, we can launch Python processes and threads in the background. Some methods allow mapping over multiple inputs and gathering the results, more on that later. The thread starts and the cell completes immediately, but the data associated with the download only appears in the queue object some time later."
413413
]
414414
},
415+
{
416+
"cell_type": "code",
417+
"execution_count": null,
418+
"metadata": {},
419+
"outputs": [],
420+
"source": [
421+
"# Edit sources.py to configure source locations\n",
422+
"import sources\n",
423+
"sources.lazy_url"
424+
]
425+
},
415426
{
416427
"cell_type": "code",
417428
"execution_count": null,
@@ -428,7 +439,7 @@
428439
" q.put(u.read())\n",
429440
"\n",
430441
"q = queue.Queue()\n",
431-
"t = threading.Thread(target=get_webdata, args=('http://www.google.com', q))\n",
442+
"t = threading.Thread(target=get_webdata, args=(sources.lazy_url, q))\n",
432443
"t.start()"
433444
]
434445
},

02_bag.ipynb

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,17 @@
120120
"b.take(1)"
121121
]
122122
},
123+
{
124+
"cell_type": "code",
125+
"execution_count": null,
126+
"metadata": {},
127+
"outputs": [],
128+
"source": [
129+
"# Edit sources.py to configure source locations\n",
130+
"import sources\n",
131+
"sources.bag_url"
132+
]
133+
},
123134
{
124135
"cell_type": "code",
125136
"execution_count": null,
@@ -128,7 +139,7 @@
128139
"source": [
129140
"# Requires `s3fs` library\n",
130141
"# each partition is a remote CSV text file\n",
131-
"b = db.read_text('s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv',\n",
142+
"b = db.read_text(sources.bag_url,\n",
132143
" storage_options={'anon': True})\n",
133144
"b.take(1)"
134145
]

prep.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515

1616
from accounts import account_entries, account_params, json_entries
1717

18+
import sources
19+
1820
DATASETS = ["random", "weather", "accounts", "flights", "all"]
1921
here = os.path.dirname(__file__)
2022
data_dir = os.path.abspath(os.path.join(here, 'data'))
@@ -53,7 +55,7 @@ def flights(small=None):
5355

5456
if not os.path.exists(flights_raw):
5557
print("- Downloading NYC Flights dataset... ", end='', flush=True)
56-
url = "https://storage.googleapis.com/dask-tutorial-data/nycflights.tar.gz"
58+
url = sources.flights_url
5759
urllib.request.urlretrieve(url, flights_raw)
5860
print("done", flush=True)
5961

sources.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
flights_url = "https://storage.googleapis.com/dask-tutorial-data/nycflights.tar.gz"
2+
lazy_url = "http://www.google.com"
3+
bag_url = "s3://dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv"

0 commit comments

Comments
 (0)