9
9
import random
10
10
import os
11
11
np.random.seed(123456 )
12
- from pandas import *
12
+ from pandas import options
13
13
import pandas as pd
14
- randn = np.random.randn
15
- randint = np.random.randint
16
14
np.set_printoptions(precision = 4 , suppress = True )
17
15
options.display.mpl_style= ' default'
18
16
from pandas.compat import lrange, lzip
19
17
20
18
# ### portions of this were borrowed from the
21
- # ### Pandas cheatsheet
22
- # ### created during the PyData Workshop-Sprint 2012
23
- # ### Hannah Chen, Henry Chow, Eric Cox, Robert Mauriello
19
+ # ### Pandas cheatsheet
20
+ # ### created during the PyData Workshop-Sprint 2012
21
+ # ### Hannah Chen, Henry Chow, Eric Cox, Robert Mauriello
24
22
25
23
26
24
********************
@@ -42,7 +40,7 @@ Object Creation
42
40
43
41
See the :ref: `Data Structure Intro section <dsintro >`
44
42
45
- Creating a ``Series `` by passing a list of values, letting pandas create a default
43
+ Creating a ``Series `` by passing a list of values, letting pandas create a default
46
44
integer index
47
45
48
46
.. ipython :: python
@@ -63,10 +61,10 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s
63
61
64
62
.. ipython :: python
65
63
66
- df2 = pd.DataFrame({ ' A' : 1 .,
67
- ' B' : pd.Timestamp(' 20130102' ),
64
+ df2 = pd.DataFrame({ ' A' : 1 .,
65
+ ' B' : pd.Timestamp(' 20130102' ),
68
66
' C' : pd.Series(1 ,index = lrange(4 ),dtype = ' float32' ),
69
- ' D' : np.array([3 ] * 4 ,dtype = ' int32' ),
67
+ ' D' : np.array([3 ] * 4 ,dtype = ' int32' ),
70
68
' E' : ' foo' })
71
69
df2
72
70
@@ -123,7 +121,7 @@ Sorting by values
123
121
Selection
124
122
---------
125
123
126
- .. note ::
124
+ .. note ::
127
125
128
126
While standard Python / Numpy expressions for selecting and setting are
129
127
intuitive and come in handy for interactive work, for production code, we
@@ -248,7 +246,7 @@ error.
248
246
x[4 :10 ]
249
247
x[8 :10 ]
250
248
251
- Pandas will detect this and raise ``IndexError ``, rather than return an empty
249
+ Pandas will detect this and raise ``IndexError ``, rather than return an empty
252
250
structure.
253
251
254
252
::
@@ -280,7 +278,7 @@ by the indexes
280
278
281
279
.. ipython :: python
282
280
283
- s1 = pd.Series([1 ,2 ,3 ,4 ,5 ,6 ],index = date_range(' 20130102' ,periods = 6 ))
281
+ s1 = pd.Series([1 ,2 ,3 ,4 ,5 ,6 ],index = pd. date_range(' 20130102' ,periods = 6 ))
284
282
s1
285
283
df[' F' ] = s1
286
284
@@ -401,7 +399,7 @@ See more at :ref:`Histogramming and Discretization <basics.discretization>`
401
399
402
400
.. ipython :: python
403
401
404
- s = Series(np.random.randint(0 ,7 ,size = 10 ))
402
+ s = pd. Series(np.random.randint(0 ,7 ,size = 10 ))
405
403
s
406
404
s.value_counts()
407
405
@@ -412,7 +410,7 @@ See more at :ref:`Vectorized String Methods <basics.string_methods>`
412
410
413
411
.. ipython :: python
414
412
415
- s = Series([' A' , ' B' , ' C' , ' Aaba' , ' Baca' , np.nan, ' CABA' , ' dog' , ' cat' ])
413
+ s = pd. Series([' A' , ' B' , ' C' , ' Aaba' , ' Baca' , np.nan, ' CABA' , ' dog' , ' cat' ])
416
414
s.str.lower()
417
415
418
416
Merge
@@ -428,7 +426,7 @@ operations.
428
426
429
427
See the :ref: `Merging section <merging >`
430
428
431
- Concatenating pandas objects together
429
+ Concatenating pandas objects together
432
430
433
431
.. ipython :: python
434
432
@@ -438,7 +436,7 @@ Concatenating pandas objects together
438
436
# break it into pieces
439
437
pieces = [df[:3 ], df[3 :7 ], df[7 :]]
440
438
441
- concat(pieces)
439
+ pd. concat(pieces)
442
440
443
441
Join
444
442
~~~~
@@ -451,7 +449,7 @@ SQL style merges. See the :ref:`Database style joining <merging.join>`
451
449
right = pd.DataFrame({' key' : [' foo' , ' foo' ], ' rval' : [4 , 5 ]})
452
450
left
453
451
right
454
- merge(left, right, on = ' key' )
452
+ pd. merge(left, right, on = ' key' )
455
453
456
454
Append
457
455
~~~~~~
@@ -484,7 +482,8 @@ See the :ref:`Grouping section <groupby>`
484
482
' foo' , ' bar' , ' foo' , ' foo' ],
485
483
' B' : [' one' , ' one' , ' two' , ' three' ,
486
484
' two' , ' two' , ' one' , ' three' ],
487
- ' C' : randn(8 ), ' D' : randn(8 )})
485
+ ' C' : np.random.randn(8 ),
486
+ ' D' : np.random.randn(8 )})
488
487
df
489
488
490
489
Grouping and then applying a function ``sum `` to the resulting groups.
@@ -493,7 +492,7 @@ Grouping and then applying a function ``sum`` to the resulting groups.
493
492
494
493
df.groupby(' A' ).sum()
495
494
496
- Grouping by multiple columns forms a hierarchical index, which we then apply
495
+ Grouping by multiple columns forms a hierarchical index, which we then apply
497
496
the function.
498
497
499
498
.. ipython :: python
@@ -516,7 +515,7 @@ Stack
516
515
[' one' , ' two' , ' one' , ' two' ,
517
516
' one' , ' two' , ' one' , ' two' ]])
518
517
index = pd.MultiIndex.from_tuples(tuples, names = [' first' , ' second' ])
519
- df = pd.DataFrame(randn(8 , 2 ), index = index, columns = [' A' , ' B' ])
518
+ df = pd.DataFrame(np.random. randn(8 , 2 ), index = index, columns = [' A' , ' B' ])
520
519
df2 = df[:4 ]
521
520
df2
522
521
@@ -543,18 +542,18 @@ See the section on :ref:`Pivot Tables <reshaping.pivot>`.
543
542
544
543
.. ipython :: python
545
544
546
- df = DataFrame({' A' : [' one' , ' one' , ' two' , ' three' ] * 3 ,
547
- ' B' : [' A' , ' B' , ' C' ] * 4 ,
548
- ' C' : [' foo' , ' foo' , ' foo' , ' bar' , ' bar' , ' bar' ] * 2 ,
549
- ' D' : np.random.randn(12 ),
550
- ' E' : np.random.randn(12 )})
545
+ df = pd. DataFrame({' A' : [' one' , ' one' , ' two' , ' three' ] * 3 ,
546
+ ' B' : [' A' , ' B' , ' C' ] * 4 ,
547
+ ' C' : [' foo' , ' foo' , ' foo' , ' bar' , ' bar' , ' bar' ] * 2 ,
548
+ ' D' : np.random.randn(12 ),
549
+ ' E' : np.random.randn(12 )})
551
550
df
552
551
553
552
We can produce pivot tables from this data very easily:
554
553
555
554
.. ipython :: python
556
555
557
- pivot_table(df, values = ' D' , rows = [' A' , ' B' ], cols = [' C' ])
556
+ pd. pivot_table(df, values = ' D' , rows = [' A' , ' B' ], cols = [' C' ])
558
557
559
558
560
559
Time Series
@@ -568,15 +567,15 @@ financial applications. See the :ref:`Time Series section <timeseries>`
568
567
.. ipython :: python
569
568
570
569
rng = pd.date_range(' 1/1/2012' , periods = 100 , freq = ' S' )
571
- ts = pd.Series(randint(0 , 500 , len (rng)), index = rng)
570
+ ts = pd.Series(np.random. randint(0 , 500 , len (rng)), index = rng)
572
571
ts.resample(' 5Min' , how = ' sum' )
573
572
574
573
Time zone representation
575
574
576
575
.. ipython :: python
577
576
578
577
rng = pd.date_range(' 3/6/2012 00:00' , periods = 5 , freq = ' D' )
579
- ts = pd.Series(randn(len (rng)), rng)
578
+ ts = pd.Series(np.random. randn(len (rng)), rng)
580
579
ts_utc = ts.tz_localize(' UTC' )
581
580
ts_utc
582
581
@@ -591,7 +590,7 @@ Converting between time span representations
591
590
.. ipython :: python
592
591
593
592
rng = pd.date_range(' 1/1/2012' , periods = 5 , freq = ' M' )
594
- ts = pd.Series(randn(len (rng)), index = rng)
593
+ ts = pd.Series(np.random. randn(len (rng)), index = rng)
595
594
ts
596
595
ps = ts.to_period()
597
596
ps
@@ -604,8 +603,8 @@ the quarter end:
604
603
605
604
.. ipython :: python
606
605
607
- prng = period_range(' 1990Q1' , ' 2000Q4' , freq = ' Q-NOV' )
608
- ts = Series(randn(len (prng)), prng)
606
+ prng = pd. period_range(' 1990Q1' , ' 2000Q4' , freq = ' Q-NOV' )
607
+ ts = pd. Series(np.random. randn(len (prng)), prng)
609
608
ts.index = (prng.asfreq(' M' , ' e' ) + 1 ).asfreq(' H' , ' s' ) + 9
610
609
ts.head()
611
610
@@ -624,7 +623,7 @@ Plotting
624
623
625
624
.. ipython :: python
626
625
627
- ts = pd.Series(randn(1000 ), index = pd.date_range(' 1/1/2000' , periods = 1000 ))
626
+ ts = pd.Series(np.random. randn(1000 ), index = pd.date_range(' 1/1/2000' , periods = 1000 ))
628
627
ts = ts.cumsum()
629
628
630
629
@savefig series_plot_basic.png
@@ -634,7 +633,7 @@ On DataFrame, ``plot`` is a convenience to plot all of the columns with labels:
634
633
635
634
.. ipython :: python
636
635
637
- df = pd.DataFrame(randn(1000 , 4 ), index = ts.index,
636
+ df = pd.DataFrame(np.random. randn(1000 , 4 ), index = ts.index,
638
637
columns = [' A' , ' B' , ' C' , ' D' ])
639
638
df = df.cumsum()
640
639
@@ -679,7 +678,7 @@ Reading from a HDF5 Store
679
678
680
679
.. ipython :: python
681
680
682
- read_hdf(' foo.h5' ,' df' )
681
+ pd. read_hdf(' foo.h5' ,' df' )
683
682
684
683
.. ipython :: python
685
684
:suppress:
@@ -701,7 +700,7 @@ Reading from an excel file
701
700
702
701
.. ipython :: python
703
702
704
- read_excel(' foo.xlsx' , ' sheet1' , index_col = None , na_values = [' NA' ])
703
+ pd. read_excel(' foo.xlsx' , ' sheet1' , index_col = None , na_values = [' NA' ])
705
704
706
705
.. ipython :: python
707
706
:suppress:
0 commit comments