Skip to content

Commit b60e1d7

Browse files
authored
use black formatting (#266)
1 parent 05b054a commit b60e1d7

File tree

7 files changed

+150
-99
lines changed

7 files changed

+150
-99
lines changed

01_dataframe.ipynb

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,9 @@
162162
"source": [
163163
"import dask.dataframe as dd\n",
164164
"\n",
165-
"ddf = dd.read_csv(os.path.join('data', 'nycflights', '*.csv'),\n",
166-
" parse_dates={'Date': [0, 1, 2]})\n",
165+
"ddf = dd.read_csv(\n",
166+
" os.path.join(\"data\", \"nycflights\", \"*.csv\"), parse_dates={\"Date\": [0, 1, 2]}\n",
167+
")\n",
167168
"ddf"
168169
]
169170
},
@@ -296,11 +297,11 @@
296297
"metadata": {},
297298
"outputs": [],
298299
"source": [
299-
"ddf = dd.read_csv(os.path.join('data', 'nycflights', '*.csv'),\n",
300-
" parse_dates={'Date': [0, 1, 2]},\n",
301-
" dtype={'TailNum': str,\n",
302-
" 'CRSElapsedTime': float,\n",
303-
" 'Cancelled': bool})"
300+
"ddf = dd.read_csv(\n",
301+
" os.path.join(\"data\", \"nycflights\", \"*.csv\"),\n",
302+
" parse_dates={\"Date\": [0, 1, 2]},\n",
303+
" dtype={\"TailNum\": str, \"CRSElapsedTime\": float, \"Cancelled\": bool},\n",
304+
")"
304305
]
305306
},
306307
{
@@ -504,7 +505,7 @@
504505
},
505506
"outputs": [],
506507
"source": [
507-
"ddf[~ddf.Cancelled].groupby('Origin').Origin.count().compute()"
508+
"ddf[~ddf.Cancelled].groupby(\"Origin\").Origin.count().compute()"
508509
]
509510
},
510511
{
@@ -594,7 +595,9 @@
594595
},
595596
"outputs": [],
596597
"source": [
597-
"ddf[\"Distance\"].apply(lambda x: x+1).compute() # don't worry about the warning, we'll discuss in the next sections\n",
598+
"ddf[\"Distance\"].apply(\n",
599+
" lambda x: x + 1\n",
600+
").compute() # don't worry about the warning, we'll discuss in the next sections\n",
598601
"\n",
599602
"# OR\n",
600603
"\n",
@@ -749,7 +752,7 @@
749752
"metadata": {},
750753
"outputs": [],
751754
"source": [
752-
"ddf_jfk = ddf_jfk.persist() # returns back control immediately"
755+
"ddf_jfk = ddf_jfk.persist() # returns back control immediately"
753756
]
754757
},
755758
{
@@ -830,9 +833,12 @@
830833
"def my_custom_converter(df, multiplier=1):\n",
831834
" return df * multiplier\n",
832835
"\n",
836+
"\n",
833837
"meta = pd.Series(name=\"Distance\", dtype=\"float64\")\n",
834838
"\n",
835-
"distance_km = ddf.Distance.map_partitions(my_custom_converter, multiplier=0.6, meta=meta)"
839+
"distance_km = ddf.Distance.map_partitions(\n",
840+
" my_custom_converter, multiplier=0.6, meta=meta\n",
841+
")"
836842
]
837843
},
838844
{

02_array.ipynb

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@
104104
"metadata": {},
105105
"outputs": [],
106106
"source": [
107-
"#NumPy array\n",
107+
"# NumPy array\n",
108108
"a_np = np.ones(10)\n",
109109
"a_np"
110110
]
@@ -122,7 +122,7 @@
122122
"metadata": {},
123123
"outputs": [],
124124
"source": [
125-
"a_np_sum = a_np[:5].sum() + a_np[5:].sum() \n",
125+
"a_np_sum = a_np[:5].sum() + a_np[5:].sum()\n",
126126
"a_np_sum"
127127
]
128128
},
@@ -181,7 +181,7 @@
181181
"outputs": [],
182182
"source": [
183183
"# visualize the low level Dask graph using cytoscape\n",
184-
"a_da_sum.visualize(engine=\"cytoscape\")\n"
184+
"a_da_sum.visualize(engine=\"cytoscape\")"
185185
]
186186
},
187187
{
@@ -220,8 +220,8 @@
220220
"metadata": {},
221221
"outputs": [],
222222
"source": [
223-
"%%time \n",
224-
"xn = np.random.normal(10, 0.1, size=(30_000, 30_000)) \n",
223+
"%%time\n",
224+
"xn = np.random.normal(10, 0.1, size=(30_000, 30_000))\n",
225225
"yn = xn.mean(axis=0)\n",
226226
"yn"
227227
]
@@ -258,7 +258,7 @@
258258
"metadata": {},
259259
"outputs": [],
260260
"source": [
261-
"yd = xd.mean(axis=0) \n",
261+
"yd = xd.mean(axis=0)\n",
262262
"yd"
263263
]
264264
},
@@ -270,8 +270,8 @@
270270
"source": [
271271
"%%time\n",
272272
"xd = da.random.normal(10, 0.1, size=(30_000, 30_000), chunks=(3000, 3000))\n",
273-
"yd = xd.mean(axis=0) \n",
274-
"yd.compute() "
273+
"yd = xd.mean(axis=0)\n",
274+
"yd.compute()"
275275
]
276276
},
277277
{
@@ -320,7 +320,7 @@
320320
},
321321
"outputs": [],
322322
"source": [
323-
"x_sum = xd + xd.T \n",
323+
"x_sum = xd + xd.T\n",
324324
"res = x_sum.mean(axis=1)\n",
325325
"res.compute()"
326326
]
@@ -393,7 +393,7 @@
393393
"metadata": {},
394394
"outputs": [],
395395
"source": [
396-
"darr = darr.rechunk({0: -1, 1: 100, 2: 'auto'})"
396+
"darr = darr.rechunk({0: -1, 1: 100, 2: \"auto\"})"
397397
]
398398
},
399399
{
@@ -582,7 +582,7 @@
582582
"outputs": [],
583583
"source": [
584584
"# 1 possible Solution (imitate original). chunks will vary if you are in binder\n",
585-
"c = da.from_zarr(\"data/random_sc.zarr\", chunks=(6250000, ))\n",
585+
"c = da.from_zarr(\"data/random_sc.zarr\", chunks=(6250000,))\n",
586586
"c"
587587
]
588588
},
@@ -671,7 +671,7 @@
671671
"metadata": {},
672672
"outputs": [],
673673
"source": [
674-
"#we will see dashboard activity\n",
674+
"# we will see dashboard activity\n",
675675
"mean.load()"
676676
]
677677
},
@@ -699,7 +699,7 @@
699699
"metadata": {},
700700
"outputs": [],
701701
"source": [
702-
"dair2 = dair.groupby('time.month').mean('time')\n",
702+
"dair2 = dair.groupby(\"time.month\").mean(\"time\")\n",
703703
"dair_new = dair - dair2\n",
704704
"dair_new"
705705
]
@@ -717,7 +717,7 @@
717717
"metadata": {},
718718
"outputs": [],
719719
"source": [
720-
"#things happen in the dashboard\n",
720+
"# things happen in the dashboard\n",
721721
"dair_new.load()"
722722
]
723723
},
@@ -736,7 +736,7 @@
736736
"metadata": {},
737737
"outputs": [],
738738
"source": [
739-
"dair_resample = dair.resample(time='1w').mean('time').std('time')"
739+
"dair_resample = dair.resample(time=\"1w\").mean(\"time\").std(\"time\")"
740740
]
741741
},
742742
{

03_dask.delayed.ipynb

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,12 @@
8888
"source": [
8989
"from time import sleep\n",
9090
"\n",
91+
"\n",
9192
"def inc(x):\n",
9293
" sleep(1)\n",
9394
" return x + 1\n",
9495
"\n",
96+
"\n",
9597
"def add(x, y):\n",
9698
" sleep(1)\n",
9799
" return x + y"
@@ -139,11 +141,13 @@
139141
"source": [
140142
"import dask\n",
141143
"\n",
144+
"\n",
142145
"@dask.delayed\n",
143146
"def inc(x):\n",
144147
" sleep(1)\n",
145148
" return x + 1\n",
146149
"\n",
150+
"\n",
147151
"@dask.delayed\n",
148152
"def add(x, y):\n",
149153
" sleep(1)\n",
@@ -258,15 +262,17 @@
258262
"%%time\n",
259263
"# Sequential code\n",
260264
"\n",
265+
"\n",
261266
"def inc(x):\n",
262267
" sleep(1)\n",
263268
" return x + 1\n",
264269
"\n",
270+
"\n",
265271
"results = []\n",
266272
"for x in data:\n",
267273
" y = inc(x)\n",
268274
" results.append(y)\n",
269-
" \n",
275+
"\n",
270276
"total = sum(results)"
271277
]
272278
},
@@ -305,11 +311,12 @@
305311
" sleep(1)\n",
306312
" return x + 1\n",
307313
"\n",
314+
"\n",
308315
"results = []\n",
309316
"for x in data:\n",
310317
" y = inc(x)\n",
311318
" results.append(y)\n",
312-
" \n",
319+
"\n",
313320
"total = sum(results)\n",
314321
"print(\"Before computing:\", total) # Let's see what type of thing total is\n",
315322
"result = total.compute()\n",
@@ -347,9 +354,11 @@
347354
" sleep(1)\n",
348355
" return 2 * x\n",
349356
"\n",
357+
"\n",
350358
"def is_even(x):\n",
351359
" return not x % 2\n",
352360
"\n",
361+
"\n",
353362
"data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
354363
]
355364
},
@@ -369,7 +378,7 @@
369378
" else:\n",
370379
" y = inc(x)\n",
371380
" results.append(y)\n",
372-
" \n",
381+
"\n",
373382
"total = sum(results)\n",
374383
"print(total)"
375384
]
@@ -402,14 +411,15 @@
402411
" sleep(1)\n",
403412
" return 2 * x\n",
404413
"\n",
414+
"\n",
405415
"results = []\n",
406416
"for x in data:\n",
407417
" if is_even(x): # even\n",
408418
" y = double(x)\n",
409-
" else: # odd\n",
419+
" else: # odd\n",
410420
" y = inc(x)\n",
411421
" results.append(y)\n",
412-
" \n",
422+
"\n",
413423
"total = sum(results)"
414424
]
415425
},
@@ -487,7 +497,8 @@
487497
"outputs": [],
488498
"source": [
489499
"import os\n",
490-
"sorted(os.listdir(os.path.join('data', 'nycflights')))"
500+
"\n",
501+
"sorted(os.listdir(os.path.join(\"data\", \"nycflights\")))"
491502
]
492503
},
493504
{
@@ -504,7 +515,8 @@
504515
"outputs": [],
505516
"source": [
506517
"import pandas as pd\n",
507-
"df = pd.read_csv(os.path.join('data', 'nycflights', '1990.csv'))\n",
518+
"\n",
519+
"df = pd.read_csv(os.path.join(\"data\", \"nycflights\", \"1990.csv\"))\n",
508520
"df.head()"
509521
]
510522
},
@@ -535,7 +547,7 @@
535547
"outputs": [],
536548
"source": [
537549
"# Mean departure delay per-airport for one year\n",
538-
"df.groupby('Origin').DepDelay.mean()"
550+
"df.groupby(\"Origin\").DepDelay.mean()"
539551
]
540552
},
541553
{
@@ -554,7 +566,8 @@
554566
"outputs": [],
555567
"source": [
556568
"from glob import glob\n",
557-
"filenames = sorted(glob(os.path.join('data', 'nycflights', '*.csv')))"
569+
"\n",
570+
"filenames = sorted(glob(os.path.join(\"data\", \"nycflights\", \"*.csv\")))"
558571
]
559572
},
560573
{
@@ -570,16 +583,16 @@
570583
"for fn in filenames:\n",
571584
" # Read in file\n",
572585
" df = pd.read_csv(fn)\n",
573-
" \n",
586+
"\n",
574587
" # Groupby origin airport\n",
575-
" by_origin = df.groupby('Origin')\n",
576-
" \n",
588+
" by_origin = df.groupby(\"Origin\")\n",
589+
"\n",
577590
" # Sum of all departure delays by origin\n",
578591
" total = by_origin.DepDelay.sum()\n",
579-
" \n",
592+
"\n",
580593
" # Number of flights by origin\n",
581594
" count = by_origin.DepDelay.count()\n",
582-
" \n",
595+
"\n",
583596
" # Save the intermediates\n",
584597
" sums.append(total)\n",
585598
" counts.append(count)\n",
@@ -647,26 +660,28 @@
647660
"# This is just one possible solution, there are\n",
648661
"# several ways to do this using `dask.delayed`\n",
649662
"\n",
663+
"\n",
650664
"@dask.delayed\n",
651665
"def read_file(filename):\n",
652666
" # Read in file\n",
653667
" return pd.read_csv(filename)\n",
654668
"\n",
669+
"\n",
655670
"sums = []\n",
656671
"counts = []\n",
657672
"for fn in filenames:\n",
658673
" # Delayed read in file\n",
659674
" df = read_file(fn)\n",
660675
"\n",
661676
" # Groupby origin airport\n",
662-
" by_origin = df.groupby('Origin')\n",
677+
" by_origin = df.groupby(\"Origin\")\n",
663678
"\n",
664679
" # Sum of all departure delays by origin\n",
665680
" total = by_origin.DepDelay.sum()\n",
666681
"\n",
667682
" # Number of flights by origin\n",
668683
" count = by_origin.DepDelay.count()\n",
669-
" \n",
684+
"\n",
670685
" # Save the intermediates\n",
671686
" sums.append(total)\n",
672687
" counts.append(count)\n",

0 commit comments

Comments
 (0)