Skip to content

Commit 429aad6

Browse files
committed
Studio_assignment
1 parent 09a21c4 commit 429aad6

File tree

1 file changed

+307
-10
lines changed

1 file changed

+307
-10
lines changed

data-manipulation/studio/data-manipulation-studio/data-manipulation-studio.ipynb

Lines changed: 307 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
},
2121
{
2222
"cell_type": "code",
23-
"execution_count": 5,
23+
"execution_count": 4,
2424
"id": "66c9a7e7",
2525
"metadata": {},
2626
"outputs": [],
@@ -243,12 +243,218 @@
243243
},
244244
{
245245
"cell_type": "code",
246-
"execution_count": 56,
246+
"execution_count": 6,
247247
"id": "da059f8a",
248248
"metadata": {},
249-
"outputs": [],
249+
"outputs": [
250+
{
251+
"data": {
252+
"text/html": [
253+
"<div>\n",
254+
"<style scoped>\n",
255+
" .dataframe tbody tr th:only-of-type {\n",
256+
" vertical-align: middle;\n",
257+
" }\n",
258+
"\n",
259+
" .dataframe tbody tr th {\n",
260+
" vertical-align: top;\n",
261+
" }\n",
262+
"\n",
263+
" .dataframe thead th {\n",
264+
" text-align: right;\n",
265+
" }\n",
266+
"</style>\n",
267+
"<table border=\"1\" class=\"dataframe\">\n",
268+
" <thead>\n",
269+
" <tr style=\"text-align: right;\">\n",
270+
" <th></th>\n",
271+
" <th>Commodity Name</th>\n",
272+
" <th>City Name</th>\n",
273+
" <th>Type</th>\n",
274+
" <th>Package</th>\n",
275+
" <th>Variety</th>\n",
276+
" <th>Sub Variety</th>\n",
277+
" <th>Grade</th>\n",
278+
" <th>Date</th>\n",
279+
" <th>Low Price</th>\n",
280+
" <th>High Price</th>\n",
281+
" <th>...</th>\n",
282+
" <th>Color</th>\n",
283+
" <th>Environment</th>\n",
284+
" <th>Unit of Sale</th>\n",
285+
" <th>Quality</th>\n",
286+
" <th>Condition</th>\n",
287+
" <th>Appearance</th>\n",
288+
" <th>Storage</th>\n",
289+
" <th>Crop</th>\n",
290+
" <th>Repack</th>\n",
291+
" <th>Trans Mode</th>\n",
292+
" </tr>\n",
293+
" </thead>\n",
294+
" <tbody>\n",
295+
" <tr>\n",
296+
" <th>0</th>\n",
297+
" <td>PUMPKINS</td>\n",
298+
" <td>BALTIMORE</td>\n",
299+
" <td>NaN</td>\n",
300+
" <td>24 inch bins</td>\n",
301+
" <td>NaN</td>\n",
302+
" <td>NaN</td>\n",
303+
" <td>NaN</td>\n",
304+
" <td>04/29/2017</td>\n",
305+
" <td>270</td>\n",
306+
" <td>280.0</td>\n",
307+
" <td>...</td>\n",
308+
" <td>NaN</td>\n",
309+
" <td>NaN</td>\n",
310+
" <td>NaN</td>\n",
311+
" <td>NaN</td>\n",
312+
" <td>NaN</td>\n",
313+
" <td>NaN</td>\n",
314+
" <td>NaN</td>\n",
315+
" <td>NaN</td>\n",
316+
" <td>E</td>\n",
317+
" <td>NaN</td>\n",
318+
" </tr>\n",
319+
" <tr>\n",
320+
" <th>1</th>\n",
321+
" <td>PUMPKINS</td>\n",
322+
" <td>BALTIMORE</td>\n",
323+
" <td>NaN</td>\n",
324+
" <td>24 inch bins</td>\n",
325+
" <td>NaN</td>\n",
326+
" <td>NaN</td>\n",
327+
" <td>NaN</td>\n",
328+
" <td>05/06/2017</td>\n",
329+
" <td>270</td>\n",
330+
" <td>280.0</td>\n",
331+
" <td>...</td>\n",
332+
" <td>NaN</td>\n",
333+
" <td>NaN</td>\n",
334+
" <td>NaN</td>\n",
335+
" <td>NaN</td>\n",
336+
" <td>NaN</td>\n",
337+
" <td>NaN</td>\n",
338+
" <td>NaN</td>\n",
339+
" <td>NaN</td>\n",
340+
" <td>E</td>\n",
341+
" <td>NaN</td>\n",
342+
" </tr>\n",
343+
" <tr>\n",
344+
" <th>2</th>\n",
345+
" <td>PUMPKINS</td>\n",
346+
" <td>BALTIMORE</td>\n",
347+
" <td>NaN</td>\n",
348+
" <td>24 inch bins</td>\n",
349+
" <td>HOWDEN TYPE</td>\n",
350+
" <td>NaN</td>\n",
351+
" <td>NaN</td>\n",
352+
" <td>09/24/2016</td>\n",
353+
" <td>160</td>\n",
354+
" <td>160.0</td>\n",
355+
" <td>...</td>\n",
356+
" <td>NaN</td>\n",
357+
" <td>NaN</td>\n",
358+
" <td>NaN</td>\n",
359+
" <td>NaN</td>\n",
360+
" <td>NaN</td>\n",
361+
" <td>NaN</td>\n",
362+
" <td>NaN</td>\n",
363+
" <td>NaN</td>\n",
364+
" <td>N</td>\n",
365+
" <td>NaN</td>\n",
366+
" </tr>\n",
367+
" <tr>\n",
368+
" <th>3</th>\n",
369+
" <td>PUMPKINS</td>\n",
370+
" <td>BALTIMORE</td>\n",
371+
" <td>NaN</td>\n",
372+
" <td>24 inch bins</td>\n",
373+
" <td>HOWDEN TYPE</td>\n",
374+
" <td>NaN</td>\n",
375+
" <td>NaN</td>\n",
376+
" <td>09/24/2016</td>\n",
377+
" <td>160</td>\n",
378+
" <td>160.0</td>\n",
379+
" <td>...</td>\n",
380+
" <td>NaN</td>\n",
381+
" <td>NaN</td>\n",
382+
" <td>NaN</td>\n",
383+
" <td>NaN</td>\n",
384+
" <td>NaN</td>\n",
385+
" <td>NaN</td>\n",
386+
" <td>NaN</td>\n",
387+
" <td>NaN</td>\n",
388+
" <td>N</td>\n",
389+
" <td>NaN</td>\n",
390+
" </tr>\n",
391+
" <tr>\n",
392+
" <th>4</th>\n",
393+
" <td>PUMPKINS</td>\n",
394+
" <td>BALTIMORE</td>\n",
395+
" <td>NaN</td>\n",
396+
" <td>24 inch bins</td>\n",
397+
" <td>HOWDEN TYPE</td>\n",
398+
" <td>NaN</td>\n",
399+
" <td>NaN</td>\n",
400+
" <td>11/05/2016</td>\n",
401+
" <td>90</td>\n",
402+
" <td>100.0</td>\n",
403+
" <td>...</td>\n",
404+
" <td>NaN</td>\n",
405+
" <td>NaN</td>\n",
406+
" <td>NaN</td>\n",
407+
" <td>NaN</td>\n",
408+
" <td>NaN</td>\n",
409+
" <td>NaN</td>\n",
410+
" <td>NaN</td>\n",
411+
" <td>NaN</td>\n",
412+
" <td>N</td>\n",
413+
" <td>NaN</td>\n",
414+
" </tr>\n",
415+
" </tbody>\n",
416+
"</table>\n",
417+
"<p>5 rows × 25 columns</p>\n",
418+
"</div>"
419+
],
420+
"text/plain": [
421+
" Commodity Name City Name Type Package Variety Sub Variety \\\n",
422+
"0 PUMPKINS BALTIMORE NaN 24 inch bins NaN NaN \n",
423+
"1 PUMPKINS BALTIMORE NaN 24 inch bins NaN NaN \n",
424+
"2 PUMPKINS BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN \n",
425+
"3 PUMPKINS BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN \n",
426+
"4 PUMPKINS BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN \n",
427+
"\n",
428+
" Grade Date Low Price High Price ... Color Environment \\\n",
429+
"0 NaN 04/29/2017 270 280.0 ... NaN NaN \n",
430+
"1 NaN 05/06/2017 270 280.0 ... NaN NaN \n",
431+
"2 NaN 09/24/2016 160 160.0 ... NaN NaN \n",
432+
"3 NaN 09/24/2016 160 160.0 ... NaN NaN \n",
433+
"4 NaN 11/05/2016 90 100.0 ... NaN NaN \n",
434+
"\n",
435+
" Unit of Sale Quality Condition Appearance Storage Crop Repack Trans Mode \n",
436+
"0 NaN NaN NaN NaN NaN NaN E NaN \n",
437+
"1 NaN NaN NaN NaN NaN NaN E NaN \n",
438+
"2 NaN NaN NaN NaN NaN NaN N NaN \n",
439+
"3 NaN NaN NaN NaN NaN NaN N NaN \n",
440+
"4 NaN NaN NaN NaN NaN NaN N NaN \n",
441+
"\n",
442+
"[5 rows x 25 columns]"
443+
]
444+
},
445+
"execution_count": 6,
446+
"metadata": {},
447+
"output_type": "execute_result"
448+
}
449+
],
250450
"source": [
251-
"# Combine the four dataframes into one!\n"
451+
"# Combine the four dataframes into one!\n",
452+
"# Combine all Northeast dataframes\n",
453+
"# Combine all Northeast dataframes\n",
454+
"northeast = pd.concat([baltimore, boston, newyork, philly], ignore_index=True)\n",
455+
"\n",
456+
"# Show first few rows\n",
457+
"northeast.head()"
252458
]
253459
},
254460
{
@@ -266,22 +472,113 @@
266472
},
267473
{
268474
"cell_type": "code",
269-
"execution_count": 57,
475+
"execution_count": 7,
270476
"id": "c839639a",
271477
"metadata": {},
272-
"outputs": [],
478+
"outputs": [
479+
{
480+
"data": {
481+
"text/html": [
482+
"<div>\n",
483+
"<style scoped>\n",
484+
" .dataframe tbody tr th:only-of-type {\n",
485+
" vertical-align: middle;\n",
486+
" }\n",
487+
"\n",
488+
" .dataframe tbody tr th {\n",
489+
" vertical-align: top;\n",
490+
" }\n",
491+
"\n",
492+
" .dataframe thead th {\n",
493+
" text-align: right;\n",
494+
" }\n",
495+
"</style>\n",
496+
"<table border=\"1\" class=\"dataframe\">\n",
497+
" <thead>\n",
498+
" <tr style=\"text-align: right;\">\n",
499+
" <th></th>\n",
500+
" <th>Low Price</th>\n",
501+
" <th>High Price</th>\n",
502+
" </tr>\n",
503+
" <tr>\n",
504+
" <th>Unit of Sale</th>\n",
505+
" <th></th>\n",
506+
" <th></th>\n",
507+
" </tr>\n",
508+
" </thead>\n",
509+
" <tbody>\n",
510+
" <tr>\n",
511+
" <th>EACH</th>\n",
512+
" <td>47.916667</td>\n",
513+
" <td>59.166667</td>\n",
514+
" </tr>\n",
515+
" <tr>\n",
516+
" <th>PER BIN</th>\n",
517+
" <td>185.845070</td>\n",
518+
" <td>206.619718</td>\n",
519+
" </tr>\n",
520+
" <tr>\n",
521+
" <th>SHELLACKED</th>\n",
522+
" <td>16.000000</td>\n",
523+
" <td>17.545455</td>\n",
524+
" </tr>\n",
525+
" </tbody>\n",
526+
"</table>\n",
527+
"</div>"
528+
],
529+
"text/plain": [
530+
" Low Price High Price\n",
531+
"Unit of Sale \n",
532+
"EACH 47.916667 59.166667\n",
533+
"PER BIN 185.845070 206.619718\n",
534+
"SHELLACKED 16.000000 17.545455"
535+
]
536+
},
537+
"execution_count": 7,
538+
"metadata": {},
539+
"output_type": "execute_result"
540+
}
541+
],
273542
"source": [
274-
"# Put your code here to find the mean low and high prices in the Northeast region for each type of unit of sale.\n"
543+
"# Put your code here to find the mean low and high prices in the Northeast region for each type of unit of sale.\n",
544+
"mean_prices = northeast.groupby(\"Unit of Sale\")[[\"Low Price\", \"High Price\"]].mean()\n",
545+
"\n",
546+
"mean_prices"
275547
]
276548
},
277549
{
278550
"cell_type": "code",
279-
"execution_count": 58,
551+
"execution_count": 8,
280552
"id": "b4b23352",
281553
"metadata": {},
282-
"outputs": [],
554+
"outputs": [
555+
{
556+
"data": {
557+
"text/plain": [
558+
"Variety\n",
559+
"BIG MACK TYPE 55.0\n",
560+
"BLUE TYPE 7.0\n",
561+
"CINDERELLA 39.0\n",
562+
"FAIRYTALE 37.0\n",
563+
"HOWDEN TYPE 224.0\n",
564+
"HOWDEN WHITE TYPE 2.0\n",
565+
"KNUCKLE HEAD 9.0\n",
566+
"MINIATURE 97.0\n",
567+
"MIXED HEIRLOOM VARIETIES 4.0\n",
568+
"PIE TYPE 198.0\n",
569+
"Name: Variety, dtype: float64"
570+
]
571+
},
572+
"execution_count": 8,
573+
"metadata": {},
574+
"output_type": "execute_result"
575+
}
576+
],
283577
"source": [
284-
"# Put your code here to find the average number of pumpkins coming into terminal markets of each variety.\n"
578+
"# Put your code here to find the average number of pumpkins coming into terminal markets of each variety.\n",
579+
"avg_pumpkins_per_variety = northeast.groupby(\"Variety\")[\"Variety\"].count() / 1\n",
580+
"\n",
581+
"avg_pumpkins_per_variety"
285582
]
286583
},
287584
{

0 commit comments

Comments
 (0)