Skip to content

Commit 281059b

Browse files
Merge pull request #145 from TomAugspurger/small-env
Set environment variable for small datasets
2 parents 3043db5 + 0403e4a commit 281059b

File tree

2 files changed

+22
-6
lines changed

2 files changed

+22
-6
lines changed

binder/start

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@ sed -i -e "s|DASK_DASHBOARD_URL|/user/${JUPYTERHUB_USER}/proxy/8787|g" binder/ju
55

66
# Import the workspace
77
jupyter lab workspaces import binder/jupyterlab-workspace.json
8+
export DASK_TUTORIAL_SMALL=1
89

910
exec "$@"

prep.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ def parse_args(args=None):
2525
parser = argparse.ArgumentParser(description='Downloads, generates and prepares data for the Dask tutorial.')
2626
parser.add_argument('--no-ssl-verify', dest='no_ssl_verify', action='store_true',
2727
default=False, help='Disables SSL verification.')
28-
parser.add_argument("--small", action="store_true", help="Whether to use smaller example datasets.")
28+
parser.add_argument("--small", action="store_true", default=None,
29+
help="Whether to use smaller example datasets. Checks DASK_TUTORIAL_SMALL environment variable if not specified.")
2930
parser.add_argument("-d", "--dataset", choices=DATASETS, help="Datasets to generate.", default="all")
3031

3132
return parser.parse_args(args)
@@ -38,11 +39,13 @@ def parse_args(args=None):
3839
'directory.')
3940

4041

41-
def flights(small=False):
42+
def flights(small=None):
4243
start = time.time()
4344
flights_raw = os.path.join(data_dir, 'nycflights.tar.gz')
4445
flightdir = os.path.join(data_dir, 'nycflights')
4546
jsondir = os.path.join(data_dir, 'flightjson')
47+
if small is None:
48+
small = bool(os.environ.get("DASK_TUTORIAL_SMALL", False))
4649

4750
if small:
4851
N = 500
@@ -86,7 +89,10 @@ def flights(small=False):
8689
end = time.time()
8790
print("** Created flights dataset! in {:0.2f}s**".format(end - start))
8891

89-
def random_array(small=False):
92+
def random_array(small=None):
93+
if small is None:
94+
small = bool(os.environ.get("DASK_TUTORIAL_SMALL", False))
95+
9096
if small:
9197
blocksize = 5000
9298
else:
@@ -108,8 +114,11 @@ def random_array(small=False):
108114
print("Created random data for array exercise in {:0.2f}s".format(t1 - t0))
109115

110116

111-
def accounts_csvs(small=False):
117+
def accounts_csvs(small=None):
112118
t0 = time.time()
119+
if small is None:
120+
small = bool(os.environ.get("DASK_TUTORIAL_SMALL", False))
121+
113122
if small:
114123
num_files, n, k = 3, 10000, 100
115124
else:
@@ -131,8 +140,11 @@ def accounts_csvs(small=False):
131140
print("Created CSV acccouts in {:0.2f}s".format(t1 - t0))
132141

133142

134-
def accounts_json(small=False):
143+
def accounts_json(small=None):
135144
t0 = time.time()
145+
if small is None:
146+
small = bool(os.environ.get("DASK_TUTORIAL_SMALL", False))
147+
136148
if small:
137149
num_files, n, k = 50, 10000, 250
138150
else:
@@ -153,8 +165,11 @@ def accounts_json(small=False):
153165
print("Created CSV acccouts in {:0.2f}s".format(t1 - t0))
154166

155167

156-
def create_weather(small=False):
168+
def create_weather(small=None):
157169
t0 = time.time()
170+
if small is None:
171+
small = bool(os.environ.get("DASK_TUTORIAL_SMALL", False))
172+
158173
if small:
159174
growth = 1
160175
else:

0 commit comments

Comments
 (0)