@@ -25,7 +25,8 @@ def parse_args(args=None):
2525 parser = argparse .ArgumentParser (description = 'Downloads, generates and prepares data for the Dask tutorial.' )
2626 parser .add_argument ('--no-ssl-verify' , dest = 'no_ssl_verify' , action = 'store_true' ,
2727 default = False , help = 'Disables SSL verification.' )
28- parser .add_argument ("--small" , action = "store_true" , help = "Whether to use smaller example datasets." )
28+ parser .add_argument ("--small" , action = "store_true" , default = None ,
29+ help = "Whether to use smaller example datasets. Checks DASK_TUTORIAL_SMALL environment variable if not specified." )
2930 parser .add_argument ("-d" , "--dataset" , choices = DATASETS , help = "Datasets to generate." , default = "all" )
3031
3132 return parser .parse_args (args )
@@ -38,11 +39,13 @@ def parse_args(args=None):
3839 'directory.' )
3940
4041
41- def flights (small = False ):
42+ def flights (small = None ):
4243 start = time .time ()
4344 flights_raw = os .path .join (data_dir , 'nycflights.tar.gz' )
4445 flightdir = os .path .join (data_dir , 'nycflights' )
4546 jsondir = os .path .join (data_dir , 'flightjson' )
47+ if small is None :
48+ small = bool (os .environ .get ("DASK_TUTORIAL_SMALL" , False ))
4649
4750 if small :
4851 N = 500
@@ -86,7 +89,10 @@ def flights(small=False):
8689 end = time .time ()
8790 print ("** Created flights dataset! in {:0.2f}s**" .format (end - start ))
8891
89- def random_array (small = False ):
92+ def random_array (small = None ):
93+ if small is None :
94+ small = bool (os .environ .get ("DASK_TUTORIAL_SMALL" , False ))
95+
9096 if small :
9197 blocksize = 5000
9298 else :
@@ -108,8 +114,11 @@ def random_array(small=False):
108114 print ("Created random data for array exercise in {:0.2f}s" .format (t1 - t0 ))
109115
110116
111- def accounts_csvs (small = False ):
117+ def accounts_csvs (small = None ):
112118 t0 = time .time ()
119+ if small is None :
120+ small = bool (os .environ .get ("DASK_TUTORIAL_SMALL" , False ))
121+
113122 if small :
114123 num_files , n , k = 3 , 10000 , 100
115124 else :
@@ -131,8 +140,11 @@ def accounts_csvs(small=False):
131140 print ("Created CSV acccouts in {:0.2f}s" .format (t1 - t0 ))
132141
133142
134- def accounts_json (small = False ):
143+ def accounts_json (small = None ):
135144 t0 = time .time ()
145+ if small is None :
146+ small = bool (os .environ .get ("DASK_TUTORIAL_SMALL" , False ))
147+
136148 if small :
137149 num_files , n , k = 50 , 10000 , 250
138150 else :
@@ -153,8 +165,11 @@ def accounts_json(small=False):
153165 print ("Created CSV acccouts in {:0.2f}s" .format (t1 - t0 ))
154166
155167
156- def create_weather (small = False ):
168+ def create_weather (small = None ):
157169 t0 = time .time ()
170+ if small is None :
171+ small = bool (os .environ .get ("DASK_TUTORIAL_SMALL" , False ))
172+
158173 if small :
159174 growth = 1
160175 else :
0 commit comments